File: xpktools.py

package info (click to toggle)
python-biopython 1.64%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 44,416 kB
  • ctags: 12,472
  • sloc: python: 153,759; xml: 67,286; ansic: 9,003; sql: 1,488; makefile: 144; sh: 59
file content (231 lines) | stat: -rw-r--r-- 7,357 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# Copyright 2004 by Bob Bussell.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package. 
"""For manipulating data from nmrview .xpk peaklist files.

XpkEntry class: A class suited for handling single lines of
non-header data from an nmrview .xpk file.  This class
provides methods for extracting data by the field name
which is listed in the last line of the peaklist header.
"""

from __future__ import print_function

import sys

HEADERLEN = 6


class XpkEntry(object):
    """Entry from a .xpk file.

    Usage: XpkEntry(xpkentry,xpkheadline) where xpkentry is the line
    from an nmrview .xpk file and xpkheadline is the line from
    the header file that gives the names of the entries
    which is typcially the sixth line of the header (counting fm 1)

    Variables are accessed by either their name in the header line as in
    self.field["H1.P"] will return the H1.P entry for example.
    self.field["entrynum"] returns the line number (1st field of line)
    """
    def __init__(self, entry, headline):
        self.fields = {}  # Holds all fields from input line in a dictionary
                          # keys are data labels from the .xpk header
        datlist = entry.split()
        headlist = headline.split()

        i = 0
        for i in range(len(datlist) - 1):
            self.fields[headlist[i]] = datlist[i+1]
        i = i + 1

        try:
            self.fields["entrynum"] = datlist[0]
        except IndexError as e:
            pass


class Peaklist(object):
    """For loading an entire .xpk file.

    This class reads in an entire xpk file and returns
    Header file lines are available as attributes
    The data lines are available as a list
    """
    def __init__(self, infn):

        with open(infn, 'r') as infile:

            # Read in the header lines
            self.firstline = infile.readline().split("\012")[0]
            self.axislabels = infile.readline().split("\012")[0]
            self.dataset = infile.readline().split("\012")[0]
            self.sw = infile.readline().split("\012")[0]
            self.sf = infile.readline().split("\012")[0]
            self.datalabels = infile.readline().split("\012")[0]

            # Read in the data lines to a list
            self.data = [line.split("\012")[0] for line in infile]

    def residue_dict(self, index):
        """Generate a dictionary idexed by residue number or a nucleus.

        The nucleus should be given as the input argument in the
        same form as it appears in the xpk label line (H1, 15N for example)
        """
        maxres = -1
        minres = -1

        # Cast the data lines into the xpentry class
        self.dict = {}
        for i in range(len(self.data)):
            line = self.data[i]
            ind = XpkEntry(line, self.datalabels).fields[index + ".L"]
            key = ind.split(".")[0]

            res = int(key)

            if (maxres == -1):
                maxres = res
            if (minres == -1):
                minres = res

            maxres = max([maxres, res])
            minres = min([minres, res])

            if str(res) in self.dict:
                # Append additional data to list under same key
                templst = self.dict[str(res)]
                templst.append(line)
                self.dict[str(res)] = templst

            else:
                # This is a new residue, start a new list
                self.dict[str(res)] = [line]  # Use [] for list type

        self.dict["maxres"] = maxres
        self.dict["minres"] = minres

        return self.dict

    def write_header(self, outfn):
        with open(outfn, 'wb') as outfile:
            outfile.write(self.firstline)
            outfile.write("\012")
            outfile.write(self.axislabels)
            outfile.write("\012")
            outfile.write(self.dataset)
            outfile.write("\012")
            outfile.write(self.sw)
            outfile.write("\012")
            outfile.write(self.sf)
            outfile.write("\012")
            outfile.write(self.datalabels)
            outfile.write("\012")


def replace_entry(line, fieldn, newentry):
    """Helper function replace an entry in a string by the field number.

    No padding is implemented currently.  Spacing will change if
    the original field entry and the new field entry are of
    different lengths.
    """
    #This method depends on xpktools._find_start_entry

    start = _find_start_entry(line, fieldn)
    leng = len(line[start:].split()[0])
    newline = line[:start] + str(newentry) + line[(start+leng):]
    return newline


def _find_start_entry(line, n):
    # find the starting point character for the n'th entry in
    # a space delimited line.  n is counted starting with 1
    # The n=1 field by definition begins at the first character
    # This function is used by replace_entry

    infield = 0       # A flag that indicates that the counter is in a field

    if (n == 1):
        return 0        # Special case

    # Count the number of fields by counting spaces
    c = 1
    leng = len(line)

    # Initialize variables according to whether the first character
    #  is a space or a character
    if (line[0] == " "):
        infield = 0
        field = 0
    else:
        infield = 1
        field = 1

    while (c < leng and field < n):
        if (infield):
            if (line[c] == " " and not (line[c-1] == " ")):
                infield = 0
            else:
                if (not line[c] == " "):
                    infield = 1
                    field = field + 1

        c = c + 1

    return c - 1


def data_table(fn_list, datalabel, keyatom):
    """Generate a data table from a list of input xpk files.

    Give the .xpk files as argument <fn_list>.
    The data element reported in <datalabel> and the index for
    the data table is by the nucleus indicated by <keyatom>.
    """
    #TODO - Clarify this docstring, add an example?
    outlist = []

    [dict_list, label_line_list] = _read_dicts(fn_list, keyatom)

    # Find global max and min residue numbers
    minr = dict_list[0]["minres"]
    maxr = dict_list[0]["maxres"]

    for dictionary in dict_list:
        if (maxr < dictionary["maxres"]):
            maxr = dictionary["maxres"]
        if (minr > dictionary["minres"]):
            minr = dictionary["minres"]

    res = minr
    while res <= maxr:        # s.t. res numbers
        count = 0
        line = str(res)
        for dictionary in dict_list:      # s.t. dictionaries
            label = label_line_list[count]
            if str(res) in dictionary:
                line = line + "\t" + XpkEntry(dictionary[str(res)][0], label).fields[datalabel]
            else:
                line = line + "\t" + "*"
            count = count + 1
        line = line + "\n"
        outlist.append(line)
        res = res + 1

    return outlist


def _read_dicts(fn_list, keyatom):
    # Read multiple files into a list of residue dictionaries
    dict_list = []
    datalabel_list = []
    for fn in fn_list:
        peaklist = Peaklist(fn)
        dict = peaklist.residue_dict(keyatom)
        dict_list.append(dict)
        datalabel_list.append(peaklist.datalabels)

    return [dict_list, datalabel_list]