File: fetch_name_from_webbook.py

package info (click to toggle)
bkchem 0.14.0~pre4%2Bgit20211228-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 29,340 kB
  • sloc: python: 46,432; xml: 909; javascript: 49; sh: 37; makefile: 16
file content (214 lines) | stat: -rw-r--r-- 7,645 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""Author: BKR
Copied of......hmmm... Inspired by the "fetch from webbook" plugin :-)

Ideas:      Getting all alternative names from webbook and or other sources and
            creating a Dialog to choose the one desired
"""

import re

try:
    from urllib.request import urlopen
except ImportError:
    from urllib import urlopen

import dialogs
import oasa_bridge

from main import interactors
from singleton_store import Store



# search strings for the name and the cas registry number
name_re = re.compile('(<h1><a id="Top" name="Top">)(.*)(</a></h1>)')
cas_re = re.compile('(<strong>CAS Registry Number:</strong>)(.*)(</li>)')
stereoisomers = re.compile('(<li><a href=")(.*)(">)(.*)(</a></li>)')

#create inchi
def get_inchi_for_one(u):

    program = Store.pm.get_preference("inchi_program_path")
    if not oasa_bridge.oasa_available:
        return '',["oasa plugin error",]
    sms = []
    inchi_mol=""
    if not interactors.check_validity(u):
        return '',["validity Error",]
    try:
        inchi_mol,key,warning=(oasa_bridge.mol_to_inchi(u, program))
#   except oasa.oasa_exceptions.oasa_inchi_error, e:
#       sms = ["InChI generation failed,", "make sure the path to the InChI program is correct in 'Options/INChI program path'", "", str(e)]
    except:
        sms = ["Unknown error occured during INChI generation, sorry", "Please, try to make sure the path to the InChI program is correct in 'Options/INChI program path'"]
        warning = []
    sms=sms+warning
    return inchi_mol, sms

def stereoisomers_found(streamx):
    alt_mol = []
    for line in streamx:
        mol_addrm = stereoisomers.search(line)
        if mol_addrm:
            mol = mol_addrm.group(4)
            if mol[-1] == "-":          #When Nist changes the name around
                mol = turn_name_around(mol)
            addr = mol_addrm.group(2)
            alt_mol.append((mol, addr))
    mols=[]
    for a in alt_mol:
        mols.append(a[0])
    #Stereoisomer selection dialog
    dialog = Pmw.Dialog(App.paper,
                        buttons=(_('OK'), _('Cancel')),
                        defaultbutton=_('OK'),
                        title=_('The molecule was not found'))
    w = Pmw.ScrolledListBox(dialog.interior(), labelpos='n', items=mols, label_text='These Stereoisomers were found')
    w.pack(expand=1, fill='both', padx=4, pady=4)
    dialog.activate()
    for a in alt_mol:
        if a[0] == w.getvalue()[0]:
            return download_nist("http://webbook.nist.gov"+a[1])

#Searches nist by name using inchi instead of the name
def get_name_from_webbook(inchi):

    # Escape problematic characters
    searchstr = ""
    for a in inchi:
        if a == "+":
            a = "%2B"
        elif a == "/":
            a = "%2F"
        elif a == "(":
            a = "%28"
        elif a == ")":
            a = "%29"
        searchstr = searchstr + a
    url = "http://webbook.nist.gov/cgi/cbook.cgi?Name=%s&Units=SI" % ("+".join(searchstr.split()))
    return download_nist(url)


def download_nist(url):
#   print url
    dialog = dialogs.progress_dialog(App, title=_("Fetching progress"))
    dialog.update(0, top_text = "Connecting to WebBook...", bottom_text=url)
    sms=[]
    try:
        stream = urlopen(url)
    except IOError:
        dialog.close()
        sms.append('Nist could not be reached.')
        sms.append('Please check your internet connection')
        return '','', sms
    cas = ''
    mol_name = ''
    dialog.update(0.5, top_text = "Reading the webbook...", bottom_text=url)
    stream_lines = stream.readlines()
    for line in stream_lines:
        line = line.decode('utf-8')
        mol_namem = name_re.search(line)
        if line == "<h2>No matches found</h2>\n":   #When stereoisomers are found
            dialog.close()
            return stereoisomers_found(stream_lines)
        elif line == "<h1>Name Not Found</h1>\n":   #When nothing is found
            dialog.close()
            sms.append("The molecule was not found")
            return mol_name, cas, sms
        if mol_namem:
            mol_name=mol_namem.group(2)
        casm = cas_re.search(line)
        if casm:
            cas = casm.group(2)
            stream.close()
            dialog.close()
            return mol_name, cas, sms
    dialog.close()
    sms.append("The molecule was not found")
    return mol_name, cas, sms

import Pmw

def err_mess_box(mess): #Pops up error OK-box
    message=""
    for m in mess:
        message=message+m+"\n"
    dialog = Pmw.Dialog(App.paper, buttons=('OK',),
    defaultbutton='OK', title='Error')

    w = Pmw.LabeledWidget(dialog.interior(), labelpos='n', label_text=message)
    w.pack(expand=1, fill='both', padx=4, pady=4)
    dialog.activate()

def turn_name_around(mol_name):
    bra = 0     #bracket counter
    back = ""   #end of name
    front= ""   #beginning of name
    frontreached = 0
    citra = ""  #cis and trans are written at the end of the Nistname
    if mol_name[len(mol_name)-7:] == ",trans-":
        mol_name = mol_name[:len(mol_name)-7]
        citra = "trans-"
    elif mol_name[len(mol_name)-8:] == ", trans-": #Nist has spaces in some Names
        mol_name = mol_name[:len(mol_name)-8]
        citra = "trans-"
    elif mol_name[len(mol_name)-5:] == ",cis-":
        mol_name =  mol_name[:len(mol_name)-5]
        citra = "cis-"
    elif mol_name[len(mol_name)-6:] == ", cis-": #Nist has spaces in some Names
        mol_name =  mol_name[:len(mol_name)-6]
        citra = "cis-"
    for a in mol_name:
        if not frontreached:
            #the front and the back are devided by "," that is not in a bracket
            if a == "," and bra == 0:
                frontreached = 1
            else:
                back = back + a
                if a == "(":
                    bra = bra + 1
                elif a == ")":
                    bra = bra -1
        else:
            front = front + a
    if front[0] == " ": #Nist has spaces in some Names
        front = front[1:]
    mol_name = citra + front + back
    return mol_name

a=App.paper.selected_mols
err_mess=[]
if len(a)!= 0:
    for b in a:
        App.paper.unselect_all()
        App.paper.select(b)
        App.paper.select(b.children)  #select whole molekule for alignment below
        App.paper.swap_sides_of_selected("horizontal")
        inchi_form, mess = get_inchi_for_one(b)
        App.paper.swap_sides_of_selected("horizontal")
        if len(mess) != 0:      #Check for Errors
            err_mess = err_mess + mess
        if inchi_form:
            mess = []
            mol_name, cas, mess = get_name_from_webbook(inchi_form)
            err_mess = err_mess + mess
            if mol_name:        #writing Name
                #sometimes Nist changes the Name around e.g.: Cyclohexane,1,2-dibromo-,cis-
                if mol_name[-1] == "-":
                    corr_mol_name = turn_name_around(mol_name)
                else:
                    corr_mol_name = mol_name
                t1 = App.paper.new_text(300, 300, text=corr_mol_name.strip())
                t1.draw()
                App.paper.place_next_to_selected ("b","v",10,t1) #place below mol
                App.paper.select([t1])
            if cas:             #writing CAS
                t2 = App.paper.new_text(300, 325, text="CAS: "+cas.strip())
                t2.draw()
                App.paper.place_next_to_selected ("b","v",5,t2) #place below mol
                App.paper.select([t2])
    if len(err_mess)!= 0:   #check for any error messages
        err_mess_box(err_mess)
    App.paper.add_bindings()
else:
    err_mess_box(["Please select a molecule"])