1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
import os.path
import Bio.PDB.mmCIF.MMCIFlex
from UserDict import UserDict
__doc__="Turn an mmCIF file into a dictionary."
class MMCIF2Dict(UserDict):
# The token identifiers
NAME=1
LOOP=2
DATA=3
SEMICOLONS=4
DOUBLEQUOTED=5
QUOTED=6
SIMPLE=7
def __init__(self, filename):
# this dict will contain the name/data pairs
self.data={}
# entry for garbage
self.data[None]=[]
if not os.path.isfile(filename):
raise IOError, "File not found."
Bio.PDB.mmCIF.MMCIFlex.open_file(filename)
self._make_mmcif_dict()
Bio.PDB.mmCIF.MMCIFlex.close_file()
def _make_mmcif_dict(self):
# local copies
NAME=self.NAME
LOOP=self.LOOP
DATA=self.DATA
SEMICOLONS=self.SEMICOLONS
DOUBLEQUOTED=self.DOUBLEQUOTED
QUOTED=self.QUOTED
SIMPLE=self.SIMPLE
get_token=Bio.PDB.mmCIF.MMCIFlex.get_token
# are we looping?
loop_flag=0
# list of names in loop
temp_list=[]
# last encountered name
current_name=None
# get first token/value pair
token, value=get_token()
# print token, value
mmcif_dict=self.data
# loop until EOF (token==0)
while token:
if token==NAME:
if loop_flag:
# Make lists for all the names in the loop
while token==NAME:
# create a list for each name encountered in loop
new_list=mmcif_dict[value]=[]
temp_list.append(new_list)
token, value=get_token()
# print token, value
loop_flag=0
# nr of data items parsed
data_counter=0
# corresponding data name
pos=0
nr_fields=len(temp_list)
# Now fill all lists with the data
while token>3:
pos=data_counter%nr_fields
data_counter=data_counter+1
temp_list[pos].append(value)
token, value=get_token()
# print token, value
if pos!=nr_fields-1:
print "ERROR: broken name-data pair (data missing)!"
# The last token was not used, so
# don't set token to None! (this means the
# last parsed token goes through the loop again)
else:
# simple name-data pair (no loop)
# so next token should be the data
next_token, data=get_token()
# print token, value
mmcif_dict[value]=data
if next_token<4:
print "ERROR: broken name-data pair (name-non data pair)!"
# print token, value
else:
# get next token
token=None
elif token==LOOP:
loop_flag=1
temp_list=[]
# get next token
token=None
elif token==DATA:
mmcif_dict[value[0:5]]=value[5:]
token=None
else:
# we found some complete garbage
print "ERROR: broken name-data pair (missing name)!"
print token, value
mmcif_dict[None].append(value)
# get next token
token=None
if token==None:
token, value=get_token()
# print token, value
def __getitem__(self, key):
return self.data[key]
if __name__=="__main__":
import sys
if len(sys.argv)!=2:
print "Usage: python MMCIF2Dict filename."
filename=sys.argv[1]
mmcif_dict=MMCIF2Dict(filename)
input=""
print "Now type a key ('q' to end, 'k' for a list of all keys):"
while(input!="q"):
input=raw_input("MMCIF dictionary key ==> ")
if input=="q":
sys.exit()
if input=="k":
for key in mmcif_dict.keys():
print key
continue
try:
value=mmcif_dict[input]
if type(value)==type([]):
for item in value:
print item
else:
print value
except KeyError:
print "No such key found."
|