1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
|
"""Author: BKR
Copied of......hmmm... Inspired by the "fetch from webbook" plugin :-)
Ideas: Getting all alternative names from webbook and or other sources and
creating a Dialog to choose the one desired
"""
import re
try:
from urllib.request import urlopen
except ImportError:
from urllib import urlopen
import dialogs
import oasa_bridge
from main import interactors
from singleton_store import Store
# search strings for the name and the cas registry number
name_re = re.compile('(<h1><a id="Top" name="Top">)(.*)(</a></h1>)')
cas_re = re.compile('(<strong>CAS Registry Number:</strong>)(.*)(</li>)')
stereoisomers = re.compile('(<li><a href=")(.*)(">)(.*)(</a></li>)')
#create inchi
def get_inchi_for_one(u):
program = Store.pm.get_preference("inchi_program_path")
if not oasa_bridge.oasa_available:
return '',["oasa plugin error",]
sms = []
inchi_mol=""
if not interactors.check_validity(u):
return '',["validity Error",]
try:
inchi_mol,key,warning=(oasa_bridge.mol_to_inchi(u, program))
# except oasa.oasa_exceptions.oasa_inchi_error, e:
# sms = ["InChI generation failed,", "make sure the path to the InChI program is correct in 'Options/INChI program path'", "", str(e)]
except:
sms = ["Unknown error occured during INChI generation, sorry", "Please, try to make sure the path to the InChI program is correct in 'Options/INChI program path'"]
warning = []
sms=sms+warning
return inchi_mol, sms
def stereoisomers_found(streamx):
alt_mol = []
for line in streamx:
mol_addrm = stereoisomers.search(line)
if mol_addrm:
mol = mol_addrm.group(4)
if mol[-1] == "-": #When Nist changes the name around
mol = turn_name_around(mol)
addr = mol_addrm.group(2)
alt_mol.append((mol, addr))
mols=[]
for a in alt_mol:
mols.append(a[0])
#Stereoisomer selection dialog
dialog = Pmw.Dialog(App.paper,
buttons=(_('OK'), _('Cancel')),
defaultbutton=_('OK'),
title=_('The molecule was not found'))
w = Pmw.ScrolledListBox(dialog.interior(), labelpos='n', items=mols, label_text='These Stereoisomers were found')
w.pack(expand=1, fill='both', padx=4, pady=4)
dialog.activate()
for a in alt_mol:
if a[0] == w.getvalue()[0]:
return download_nist("http://webbook.nist.gov"+a[1])
#Searches nist by name using inchi instead of the name
def get_name_from_webbook(inchi):
# Escape problematic characters
searchstr = ""
for a in inchi:
if a == "+":
a = "%2B"
elif a == "/":
a = "%2F"
elif a == "(":
a = "%28"
elif a == ")":
a = "%29"
searchstr = searchstr + a
url = "http://webbook.nist.gov/cgi/cbook.cgi?Name=%s&Units=SI" % ("+".join(searchstr.split()))
return download_nist(url)
def download_nist(url):
# print url
dialog = dialogs.progress_dialog(App, title=_("Fetching progress"))
dialog.update(0, top_text = "Connecting to WebBook...", bottom_text=url)
sms=[]
try:
stream = urlopen(url)
except IOError:
dialog.close()
sms.append('Nist could not be reached.')
sms.append('Please check your internet connection')
return '','', sms
cas = ''
mol_name = ''
dialog.update(0.5, top_text = "Reading the webbook...", bottom_text=url)
stream_lines = stream.readlines()
for line in stream_lines:
line = line.decode('utf-8')
mol_namem = name_re.search(line)
if line == "<h2>No matches found</h2>\n": #When stereoisomers are found
dialog.close()
return stereoisomers_found(stream_lines)
elif line == "<h1>Name Not Found</h1>\n": #When nothing is found
dialog.close()
sms.append("The molecule was not found")
return mol_name, cas, sms
if mol_namem:
mol_name=mol_namem.group(2)
casm = cas_re.search(line)
if casm:
cas = casm.group(2)
stream.close()
dialog.close()
return mol_name, cas, sms
dialog.close()
sms.append("The molecule was not found")
return mol_name, cas, sms
import Pmw
def err_mess_box(mess): #Pops up error OK-box
message=""
for m in mess:
message=message+m+"\n"
dialog = Pmw.Dialog(App.paper, buttons=('OK',),
defaultbutton='OK', title='Error')
w = Pmw.LabeledWidget(dialog.interior(), labelpos='n', label_text=message)
w.pack(expand=1, fill='both', padx=4, pady=4)
dialog.activate()
def turn_name_around(mol_name):
bra = 0 #bracket counter
back = "" #end of name
front= "" #beginning of name
frontreached = 0
citra = "" #cis and trans are written at the end of the Nistname
if mol_name[len(mol_name)-7:] == ",trans-":
mol_name = mol_name[:len(mol_name)-7]
citra = "trans-"
elif mol_name[len(mol_name)-8:] == ", trans-": #Nist has spaces in some Names
mol_name = mol_name[:len(mol_name)-8]
citra = "trans-"
elif mol_name[len(mol_name)-5:] == ",cis-":
mol_name = mol_name[:len(mol_name)-5]
citra = "cis-"
elif mol_name[len(mol_name)-6:] == ", cis-": #Nist has spaces in some Names
mol_name = mol_name[:len(mol_name)-6]
citra = "cis-"
for a in mol_name:
if not frontreached:
#the front and the back are devided by "," that is not in a bracket
if a == "," and bra == 0:
frontreached = 1
else:
back = back + a
if a == "(":
bra = bra + 1
elif a == ")":
bra = bra -1
else:
front = front + a
if front[0] == " ": #Nist has spaces in some Names
front = front[1:]
mol_name = citra + front + back
return mol_name
a=App.paper.selected_mols
err_mess=[]
if len(a)!= 0:
for b in a:
App.paper.unselect_all()
App.paper.select(b)
App.paper.select(b.children) #select whole molekule for alignment below
App.paper.swap_sides_of_selected("horizontal")
inchi_form, mess = get_inchi_for_one(b)
App.paper.swap_sides_of_selected("horizontal")
if len(mess) != 0: #Check for Errors
err_mess = err_mess + mess
if inchi_form:
mess = []
mol_name, cas, mess = get_name_from_webbook(inchi_form)
err_mess = err_mess + mess
if mol_name: #writing Name
#sometimes Nist changes the Name around e.g.: Cyclohexane,1,2-dibromo-,cis-
if mol_name[-1] == "-":
corr_mol_name = turn_name_around(mol_name)
else:
corr_mol_name = mol_name
t1 = App.paper.new_text(300, 300, text=corr_mol_name.strip())
t1.draw()
App.paper.place_next_to_selected ("b","v",10,t1) #place below mol
App.paper.select([t1])
if cas: #writing CAS
t2 = App.paper.new_text(300, 325, text="CAS: "+cas.strip())
t2.draw()
App.paper.place_next_to_selected ("b","v",5,t2) #place below mol
App.paper.select([t2])
if len(err_mess)!= 0: #check for any error messages
err_mess_box(err_mess)
App.paper.add_bindings()
else:
err_mess_box(["Please select a molecule"])
|