File: dry.py

package info (click to toggle)
autodock-vina 1.2.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 116,952 kB
  • sloc: cpp: 8,029; python: 3,325; sh: 70; makefile: 50
file content (1377 lines) | stat: -rwxr-xr-x 43,693 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
#!/usr/bin/env python



""" 
Identify water molecules in close contact with a ligand in a PDB file

"""

# TODO add save best cluster option

# v.1.1 Adding support for normalizing the score by water molecules

from sys import argv
from numpy import sqrt, array
import os, getopt


#name  = argv[1].split(".")[0]

SOFTENING = .5 # softening value used in the AutoGrid maps
DEBUG = False
CUTOFF = 3.5 + SOFTENING# Angstrom. Maximum value for water interaction [ tested values : 3.15 3.51@1cbs, 3.25 1UY9; 3.5 2wi4 ]
CUTOFF = 4

DEFAULT_PENALTY = 0.3 # kcal/mol per W atom
OVERLAP_DISTANCE = 1. # angstroms to consider two W atoms overlapping for averaging
CLASH = 2.4
CLASH = 1.9
CLASH = 2.03
buffer = 5.
# TODO 
# TODO make the cutoffs agnostic of the absolute value,
# TODO but use the weight of the maps..
# TODO 
#
#
# TODO add a minimum-search protocol, for placing the water at the best spot
# TODO nearby to do a local search refinement... [ to do in AutoDock? ]

WAFFINITY_CUTOFF = -0.35 # map value to keep waters # 1efy:0.35
WAFFINITY_STRONG = -0.5 
WAFFINITY_WEAK   = -0.3

MATCH_DISTANCE = 2.0 # 1.5 + SOFTENING # to be the same as overlap_distance?

QUIET = False

lig_lig = []
lone = []
lig_rec = []

LOG=[]

cluster_number = 1
# accepted kwords for the input DLG
# INPUT:  docking.dlg [ receptor.pdbqt ]
#            if no receptor is provided, only water-water and water-ligand overlaps will be considered

# OUTPUT: cleaned docking.dlg (???)


def dist(firstline, secondline):  
    # INFO   : calculate the atomic distance between two PDB atom lines
    # INPUT  : two pdb lines
    # OUTPUT : a pdb line
    #coord1=[]
    #coord2=[]
    #temp=firstline[28:55]
    #coord1=temp.split()
    #temp=secondline[28:55]
    #coord2=temp.split()
    # floating the strings
    #for index in range(len(coord1)):
    #    coord1[index]=float(coord1[index])
    #    coord2[index]=float(coord2[index])
    coord1 = coord(firstline)
    coord2 = coord(secondline)
    measure=sqrt((coord1[0]-coord2[0])**2+(coord1[1]-coord2[1])**2+(coord1[2]-coord2[2])**2)
    return measure

def pc(v, tot):
    return ( (float(v)/float(tot)) * 100)

def get_lines(filename):
    f = open(filename, 'r')
    lines = f.readlines()
    f.close()
    return lines

"""
def get_coords(filename = None, list = None, include_hydrogens = True): # TODO DISABLED
    " "" 
    designed to extract receptor coordinates from a PDB(QT)
    but it can be used with lists too.
    return [ text_pdb_atom_entries, numeric_array_coords, atype]
    " ""
    coord = []
    atoms = []
    atype = []
    if not filename and not list:
        return False
    if filename:
        source = get_lines(filename)
    if list:
        source = list
    for l in source:
        if l.startswith("ATOM") or l.startswith("HETATM"):
            at = l.rsplit(None, 1)[1]
            if not at == "HD" or include_hydrogens: # by default, HD are included
                coord.append([float(l[30:38]),float(l[38:46]),float(l[46:54])])
                atoms.append(l)
                atype.append(at) 
    return { 'text' : atoms, 'coord' : array( coord, 'f'), 'atype': atype }
"""

def coord(l):
    return [float(l[30:38]),float(l[38:46]),float(l[46:54])]


def parse_dlg (filename, raw_output = False):
    # INPUT : DLG file (or pdbqt?)
    # OUTPUT: pdb-compliant lines
    # OPTIONS: raw_output (True/False) get raw "DOCKED: " lines or the clustered lines

    file = open(filename, 'r')
    dlg = file.readlines()
    file.close()

    accepted_kw = [ "MODEL",
        "USER",
        "ATOM",
        "HETATM",
        "TER",
        "ENDMDL"
        ]
    raw_keywords = ["DOCKED:", "INPUT-PDBQ:"]

    output = []
    for line in dlg:
        line = line.strip()
        if line:
            try:
                kw = line.split()[0]
            except:
                pass
            if not raw_output:
                if kw in accepted_kw:
                    output.append(line)
            else:
                if kw in raw_keywords:
                    output.append( line.split(": ", 1)[1])
    return output

def parse_pdbqt (filename):
    # INPUT : DLG file (or pdbqt?)
    # OUTPUT: pdb-compliant lines
    # 
    # TODO add the flexible residues support

    fp = open(filename, 'r')
    dlg = fp.readlines()
    fp.close()

    accepted_kw = [ "MODEL",
        "USER",
        "ATOM",
        "HETATM",
        "TER",
        "ENDMDL",
        "REMARK",
        'ROOT',
        'ENDROOT',
        'BRANCH',
        'ENDBRANCH'
        'TORSDOF',
        ]

    output = []
    for line in dlg:
        try:
            kw = line.split()[0]
        except:
            pass #print line
        if kw in accepted_kw:
            output.append(line)
    return output

def get_lelc(filename, cluster_number = None):
    # get clustering information from a DLG
    # histogram graph.
    biggest = 0
    fp = open(filename, 'r')
    dlg = fp.readlines()
    fp.close()
    IN = False
    current_cluster = 0
    for line in dlg:
        if "_____|___________|_____|___________|_____|______________________________________" in line:
            IN = False

        if IN:
            tmp = line.split("|")
            cluster_size = int(tmp[4])
            current_cluster += 1
            if cluster_number and current_cluster == cluster_number:
                return [ int(tmp(0)), cluster_size, tmp[5] ] # TODO TO TEST!
            elif cluster_size > biggest:
                biggest = cluster_size
                cluster = int(tmp[0])
                run = int(tmp[2])
        else:
            if "RANKING" in line:
                tmp = line.split()
                if int(tmp[0]) == cluster and int(tmp[1]) == 1: # pick the subrank 1 of the most populated cluster
                    rmsd = tmp[5]
                    return [cluster, biggest, rmsd, run]

        if "_____|___________|_____|___________|_____|____:____|____:____|____:____|____:___" in line:
            IN = True




def parse_models(structure):
    models = []
    buffer = []

    for line in structure:
        if not "ENDMDL" in line:
            buffer.append(line)
        else:
            buffer.append(line)
            models.append(buffer)
            buffer = []

    if models:
        if not QUIET: print(" [ %d docking pose(s) found ] " % len(models), end=' ')

        return models
    else:
        if not QUIET: print("\t\t\t\t [ NO DOCKING POSES HAVE BEEN FOUND!!!! ] ")
        return False




def minmax(models):
    # INPUT : pdb-like line
    # OUTPUT: coordinates (x,y,z), (X,Y,Z)
    """ Return max/min coordinates of
    the box including the given atoms list"""

    x_min, y_min, z_min =  999999999,  9999999999,  9999999999
    x_max, y_max, z_max = -999999999, -9999999999, -9999999999
    for pose in models:
        for line in pose:
            if line[0:4] == "ATOM" or line[0:6] == "HETATM":
                atom = coord(line)
                # min
                if atom[0] < x_min:
                    x_min = atom[0]
                if atom[1] < y_min:
                    y_min = atom[1]
                if atom[2] < z_min:
                    z_min = atom[2]
                # max
                if atom[0] > x_max:
                    x_max = atom[0]
                if atom[1] > y_max:
                    y_max = atom[1]
                if atom[2] > z_max:
                    z_max = atom[2]
    return [x_min, y_min, z_min], [x_max, y_max, z_max]


def load_receptor(filename, filter = True):
    file = open(filename, 'r')
    stack = file.readlines()
    file.close()
    if filter:
        output = []
        for line in stack:
            if line[0:4] == "ATOM" or line[0:6] == "HETATM":
                output.append(line)
        return output
    else:
        return stack

def in_the_box(atom_list, MIN, MAX):
    # INPUT : pdb-like atom, MIN = [x,y,z], MAX = [x,y,z]
    # OUTPUT: True/False

    good = []
    for atom in atom_list:
        pos = coord(atom)
        if pos[0] < MAX[0]+buffer and pos[0] > MIN[0]-buffer:
            if pos[1] < MAX[1]+buffer and pos[1] > MIN[1]-buffer:
                if pos[2] < MAX[2]+buffer and pos[2] > MIN[2]-buffer:
                    good.append(atom)
    if good:
        return good
    else:
        return False

    
def atom_type(line):
    # make it to work with both PDB and PDBQT
    atype = ""
    line = line.strip()
    value = line.split()[2]
    value = line.split()[-1]
    for c in value:
        if c.isalpha():
            atype += c
    return value


def pdb_atom_type(line):
    atype = ""
    tmp = line[12:16].strip()
    for c in tmp:
        if c.isalpha():
            atype+= c
    return atype

def pdbqt_atom_type(line):
    return line.split()[-1].strip()


### TODO create a function to extract input ligand and corresponding atom type
def input_ligand_pdbqt(dlg):
    ligand = []
    atypes = []
    for line in dlg:
        if line[0:18] == "INPUT-LIGAND-PDBQT":
            line = line.split(None, 1)[1]
            ligand.append(line)
            splitting = line.split()
            if splitting[0] == "ATOM" or splitting[0] == "HETATM":
                atypes.append(splitting[-1])
    return ligand, atypes



def selfoverlap(model, cutoff = CLASH):
    global LOG
    # remove ligand-overlapping w-atoms
    # INPUT : pdb model
    # OUTPUT: cleaned pdb model
    count = 0
    h_cutoff = cutoff - 0.7 # 0.7 first value; 0.6
    for pose in model:
        waters = []
        atoms  = []
        lig_lig.append("MODEL\n")
        idx = model.index(pose)
        for entry in pose:
            if entry[0:4] == "ATOM" or entry[0:6] == "HETATM":
                if pdb_atom_type(entry) == "W":
                    waters.append(entry)
                else:
                    atoms.append(entry)
        if DEBUG:
            LOG.append("MODEL\nREMARK LIGAND_OVERLAP_WATERS [ pose %d ]\n###################\n" % (idx))
        for w in waters:
            try:
                for a in atoms:
                    if not pdb_atom_type(a) == "H" and not pdbqt_atom_type(a)[0] == "H":
                        distance = dist(w,a)
                        if distance < cutoff:
                            count += 1
                            lig_lig.append(w)
                            pose.remove(w)
                            if DEBUG:
                                LOG.append("REMARK water/ligand_atom")
                                LOG.append(w)
                                LOG.append(a)
                                LOG.append("REMARK dist: "+str(distance)+" [max : "+str(cutoff)+" ]"+atom_type(a)+"\n\n")
                            raise
            except:
                pass
        if DEBUG: LOG.append("ENDMDL")
        lig_lig.append("ENDMDL\n")
        model[idx] = pose
    if not QUIET: print(" [ %d water(s) removed ]" % count)
    return model


def get_waters(pdbqt_poses, type = "W"):
    waters = []
    for p in pdbqt_poses:
        for a in p:
            if a[0:4] == "ATOM" or a[0:6] == "HETATM":
                if pdbqt_atom_type(a) == type:
                    waters.append(a)
    if not QUIET: print(" [ filtered pose contains %d waters ]" % len(waters))
    return waters


def targetoverlap(model, target, cutoff = CLASH):
    global LOG
    # INPUT : pdb lig (multi)model and pdb target model
    # OUTPUT: cleaned pdb lig model
    h_cutoff = cutoff - 0.4 # TODO decrease to 0.4? keep 0.7?
    h_cutoff = 0 # TODO CHECK THIS EXCLUDING HD!!!
    count = 0
    for pose in model:
        lig_rec.append("MODEL\n")
        waters = []
        idx = model.index(pose)
        for entry in pose:
            if entry[0:4] == "ATOM" or entry[0:6] == "HETATM":
                if pdb_atom_type(entry) == "W":
                    waters.append(entry)
        if DEBUG:
            LOG.append("MODEL\nREMARK TARGET_OVERLAP_WATERS [ pose %d ]\n###################\n" % (idx))
        for w in waters:
            try:
                for a in target:
                    if not pdbqt_atom_type(a) == "HD":
                        max = cutoff
        
                        if dist(w, a) <= max:
                            
                            lig_rec.append(w)
                            lig_rec.append(a)
                            pose.remove(w)
                            count += 1
                            if DEBUG:
                                print("water kicked out:", w, a)
                                LOG.append("REMARK water/receptor_atom")
                                LOG.append(w)
                                LOG.append(a)
                                LOG.append("REMARK dist: "+str(distance)+" [max : "+str(max)+" ]"+atom_type(a)+"\n\n")
                            raise
            except:
                pass
        if DEBUG: LOG.append("ENDMDL")
        lig_rec.append("ENDMDL ")
        model[idx] = pose
    if not QUIET: print(" [ %d water(s) removed ]" % count)
    return model


def lonewaters(model, target, cutoff = CUTOFF): # tested values : 3.15 3.51@1cbs, 3.25 1UY9; 3.5 2wi4
    hcutoff = cutoff - 0.7
    count = 0
    for pose in model:
        waters = []
        mates = []
        idx = model.index(pose)
        for entry in pose:
            if entry[0:4] == "ATOM" or entry[0:6] == "HETATM":
                if atom_type(entry) == "W":
                    waters.append(entry)
        for entry in target:
            if entry[0:4] == "ATOM" or entry[0:6] == "HETATM":
                atype = entry.split()[-1]
                if atype == "NA" or atype == "OA" or atype == "HD":
                    mates.append(entry)
        
        lone.append("MODEL ")
        for w in waters:
            safe = False
            for a in mates:
                atype = a.split()[-1]
                if atype == "HD":
                    max = hcutoff
                else:
                    max = cutoff
                if dist(w, a) < max:
                    if DEBUG:
                        LOG.append("LONE_WATERS\n###[ pose"+str(idx)+"]######\nwater: "+w+"r_atm: "+a+"dist:"+str(dist(w,a)))
                    safe = True
                    break
            if not safe:
                count += 1
                lone.append(w)
                pose.remove(w)
        model[idx] = pose
        
        lone.append("ENDMDL ")
    
    if not QUIET: print("\t\t\t [ %d water(s) removed ]" % count)
    return model



def average(list):
    # INPUT : pdb atom/het lines
    # OUTPUT: average coordinates

    total = [ 0., 0., 0.]
    for atom in list:
        a_coords = coord(atom)
        total[0] += a_coords[0]
        total[1] += a_coords[1]
        total[2] += a_coords[2]

    avg = [ total[0]/len(list), total[1]/len(list), total[2]/len(list) ]
    return avg


def pdbatom(coords, kw = "ATOM  ", atype = "X", pcharge = 0.):
    # INPUT : three coordinates
    # OUTPUT: a pdb atom
    index = 99
    residue = "WAT"
    chain = 9
    line = "%s%5d %2s   %3s %1s%4d    %8.3f%8.3f%8.3f  1.00 10.00     %1.3f %1s" % (kw, index, atype, residue, chain, index, coords[0], coords[1], coords[2], pcharge, atype)
    return line


def cluster_waters(model, cutoff = OVERLAP_DISTANCE):
    # TODO decide if report the pop size as:
    #    W3+
    #   b-factor
    #   partial charge   <-

    count = 0
    for pose in model:
        waters = []
        candidates = []
        idx = model.index(pose)
        for entry in pose:
            if entry[0:4] == "ATOM" or entry[0:6] == "HETATM":
                if pdbqt_atom_type(entry) == "W":
                    waters.append(entry)
                    pose.remove(entry)
        print(" [cluster: found %d waters]" % len(waters))
    
        for w in waters:
            pop = [w]
            for mate in waters:
                if not mate == w:
                    if dist(mate, w) <= cutoff:
                        pop.append(mate)
                        waters.remove(mate)
                        waters.remove(w)
            if len(pop)> 0:
                candidates.append(pop)
    
        for pop in candidates:
            count = len(pop)
            avg = average(pop)
            avg_atom = pdbatom(avg, atype = "W", pcharge = count)
            pose.append(avg_atom)
        model[idx] = pose

    if count > 1:
        if not QUIET: print("\t\t\t [ %d clustered ]" % count)
    else:
        if not QUIET: print("\t\t\t [ none ]") 
    return model
            

def save_pdb(list, filename):
    file = open(filename, 'w')
    for i in list:
        print(i[:-1], file=file)
    file.close

    
def save_output_pdbqt(dictionary, output_filename, count = 0, nowaters = False, index = False, printrms = False):
    # NOTE output_filename is a pointer
    # count = 0 will process all the structures
    # any other value will keep going...
    output = []

    if index:
        index -=1
        model = dictionary[index]
        for item in model:
            if item[0:4] == "ATOM" or item[0:6] == "HETATM":
                atype = atom_type(item)
                if atype == "W" and nowaters:
                    pass
                else:
                    item = item[0:77]+atype
                    output.append(item)
            else:
                item = item[:-1]
                output.append(item)

    else:
        for model in dictionary:
            for item in model:
                if printrms and "RMSD from reference structure" in item:
                    print(item.split()[6])
                if item[0:4] == "ATOM" or item[0:6] == "HETATM":
                    atype = atom_type(item)
                    if atype == "W" and nowaters:
                        pass
                    else:
                        item = item[0:77]+atype
                        output.append(item)
                else:
                    item = item[:-1]
                    output.append(item)
            if not count == 0:
                break

    for line in output:
        print(line, file=output_filename)




def experimental_match( calc_list, exp_list, cutoff = MATCH_DISTANCE):
    real = len(exp_list)
    predicted = len(calc_list)
    if not QUIET: print("real/predicted", real,predicted)
    
    found = 0

    min = 9999999
    stack = []
    invented = 0
    if real > predicted:
        missed = real-predicted
    if real < predicted:
        invented = predicted-real
    for e in exp_list:
        for w in calc_list:
            if (not e in stack) and (not w in stack):
                measure = dist(w,e)
                if measure <= cutoff:
                    found += 1
                    stack.append(w)
                    stack.append(e)
                if measure < min:
                    min = measure
    missed = real-found
    success = (float(found)/float(real))*100
    return (found, success, missed, invented)



def confirm_predictions(calc_list, exp_list, cutoff = MATCH_DISTANCE):
    real = len(exp_list)
    predicted = len(calc_list)
    if DEBUG: print("confirm_predictions>real/predicted", real,predicted)
    
    found = 0
    distance_log = []
    for w in calc_list:
        for e in exp_list:
            d = dist(w,e)
            if d < cutoff:
                found += 1
                distance_log.append([w, e, d])
                break
    return [found, distance_log]

                


def getmap(data):

    spacing = float(data[3].split()[1])

    pts = data[4].split()[1:]
    for i in range(len(pts)):
        pts[i] = int(pts[i])+1

    center = data[5].split()[1:]
    for i in range(len(pts)):
        center[i] = float(center[i])

    data = data[6:]
    for i in range(len(data)):
        data[i] = float(data[i])

    step = []
    step.append(pts[0]/2 * spacing)
    step.append(pts[1]/2 * spacing)
    step.append(pts[2]/2 * spacing)

    min, max = [], []

    min.append(center[0]-step[0])
    min.append(center[1]-step[1])
    min.append(center[2]-step[2])

    max.append(center[0]+step[0])
    max.append(center[1]+step[1])
    max.append(center[2]+step[2])

    data = array(data, 'f').reshape(int(pts[2]), int(pts[1]), int(pts[0]))

    data = data.reshape(int(pts[2]), int(pts[1]), int(pts[0]))
    return { "values" : data, "spacing" : spacing, 'pts': pts, 'center' : center, 'min' : min, 'max' : max}
    


def getpoints(mapdata, coords=None, distance=None):
    data = mapdata['values']
    min = mapdata['min']
    max = mapdata['max']
    spacing = mapdata['spacing']
    pts = mapdata['pts']
    
    if not distance:
        distace = 1.0 # default 1A distance scanning

    if not coords and not distance:
        coords = [3., 4., 5.]
        distance = 1.0

    pt_scan = int( round(float(distance)/(spacing)) )
    if pt_scan == 0: pt_scan = 1 # at least one point around
    if DEBUG: print("Grid range: %2.2f [ +/- %d points : %2.2f ]" % (distance,pt_scan, pt_scan*spacing), end=' ') 
    best = 999

    if (coords[0] < max[0]) and (coords[0] > min[0]):
        if (coords[1] < max[1]) and (coords[1] > min[1]):
            if (coords[2] < max[2]) and (coords[2] > min[2]):
                z_pt = int(round((coords[2] - min[2])/spacing))
                y_pt = int(round((coords[1] - min[1])/spacing))
                x_pt = int(round((coords[0] - min[0])/spacing))

                for x_ofs in range(-pt_scan, pt_scan+1): 
                    x = x_pt + x_ofs
                    if x < 0 or x >= pts[0]:
                        break
                    for y_ofs in range(-pt_scan, pt_scan+1): 
                        y = y_pt + y_ofs
                        if y < 0 or y >= pts[1]:
                            break
                        for z_ofs in range(-pt_scan, pt_scan+1): 
                            z = z_pt + z_ofs
                            if z < 0 or z >= pts[2]:
                                break
                            if data[z,y,x] < best:
                                best = data[z,y,x]
            if DEBUG: print(" in getpoints, best: %2.2f" % best, end=' ')
            return best
    return False




def scoreCorrection(atoms, atom_counts, penalty):
    """ ugly function, just temporary for helping Yang.
        To be rewritten in a more clean way...

        it works only with PDBQT+ structures!
    """
    #penalty = 0.3 # same as the entropy?
    heavy = atom_counts[0]
    waters = atom_counts[1]
    penalty = waters * penalty
    if not QUIET: print(" energy correction (penalty = %2.3f per %d W atom) " % (penalty,waters))
    if waters == 0: return atoms

    hpattern = "USER    AD_histogram> "
    epattern = "USER    AD_LE> "
    cpattern = "USER    AD_LC> "
    LONG=False
    for i in range(len(atoms)):
        l = atoms[i]
        #print l
        # histogram
        if hpattern in l:
            histo = l.split(hpattern)[1]
            histo = histo.split(",")
            for j in range(len(histo)):
                e,bulk = histo[j].split(":",1)
                histo[j] = "%2.3f:%s" % (float(e)-penalty, bulk)
            correct = hpattern
            for h in histo:
                correct += h
                correct += ","
            correct = correct[:-1]+"\n"
            atoms[i] = correct
        # le
        elif epattern in l:
            info = l.split(epattern)[1]
            # USER    AD_LE> -9.310,	-0.776,	1,	100.00

            info = info.split(",",2)
            e_org = float(info[0])
            leff_org = float(info[1])
            e = e_org + penalty
            leff = e/heavy
            correct = epattern
            correct += " %2.3f," % e
            correct += " %2.3f," % leff
            correct += info[2]
            atoms[i] = correct
        # lc
        elif cpattern in l:
            LONG=True

            info = l.split(cpattern)[1]
            # USER    AD_LC> -9.310,	-0.776,	1,	100.00
            info = info.split(",",2)
            ec_org = float(info[0])
            leffc_org = float(info[1])
            ec = ec_org + penalty
            leffc = ec/heavy
            correct = epattern
            correct += " %2.3f," % ec
            correct += " %2.3f," % leffc
            correct += info[2]
            atoms[i] = correct


    if not QUIET:
        print("\t(LE) energy            : %2.3f \t[org: %2.3f ]" % (e, e_org))
        print("\t     ligand efficiency : %2.3f \t[org: %2.3f ]" % (leff, leff_org))

        if LONG:
            print("\t(LC) energy            : %2.3f \t[org: %2.3f ]" % (ec, ec_org))
            print("\t     ligand efficiency : %2.3f \t[org: %2.3f ]" % (leffc, leffc_org))
    else:
        print("%s,%2.3f,%2.3f" % (input_file,e,leff), end=' ')
        if LONG:
            print(",%2.3f,%2.3f" % (ec,leffc))
        else:
            print("\n")
            
    return atoms



def countWatoms(madels,excludeH = True):
    w = 0
    a = 0
    for l in models[0]:
        if l.startswith("ATOM") or l.startswith("HETATM"):
            if l.split()[-1] == "W": w+=1
            else: 
                if not "HD" in l.split()[-1]:  a+=1
    return a,w
            




def usage():
    myname =  os.path.basename(argv[0])

    print("""\n
                                             ,---,  
                                          ,`--.' |  
                ,---,                     |   :  :  
              .'  .' `\                   '   '  ;  
            ,---.'     \   __  ,-.        |   |  |  
            |   |  .`\  |,' ,'/ /|        '   :  ;  
            :   : |  '  |'  | |' |   .--, |   |  '  
            |   ' '  ;  :|  |   ,' /_ ./| '   :  |  
            '   | ;  .  |'  :  /, ' , ' : ;   |  ;  
            |   | :  |  '|  | '/___/ \: | `---'. |  
            '   : | /  ; ;  : | .  \  ' |  `--..`;  
            |   | '` ,/  |  , ;  \  ;   : .--,_     
            ;   :  .'     ---'    \  \  ; |    |`.  
            |   ,.'                :  \  \`-- -`, ; 
            '---'                   \  ' ;  '---`"  
                                     `--`           

    """)

    print("\tUSAGE :\t%s [options] -i input.[dlg|pdbqt] \n" % myname)
    print("\t     -i\t\tinput file [ required ]. Accepted file formats are '.dlg', '.pdbqt' (including pdbqt+)\n")

    print("\tOptions")
    print("\t-------")
    print("\t    -F\t\tfull mode (write also the DRY,STRONG,WEAK files to cope with ADT limitations)")
    print("\t    -E\t\textract only the lowest energy structure, strip all waters (if present) and save the file")
    print("\t    -c\t\textract the lowest energy-largest cluster result ( DLG input only )")
    print("\t    -C\t\textract the lowest energy-largest cluster result, strip all waters (if present) and save the file (DLG input only)")
    print("\t    -p [integer]\t\textract the selected cluster number")

    print("\t    -n [float]\t\tnormalize the energy by using this value per W atoms [ default : %2.3f ]\n" % (DEFAULT_PENALTY))
    print("\tStructural input")
    print("\t----------------")
    print("\t    -r rec.pdbqt\tuse the receptor PDBQT file")
    print("\t    -w waters.pdb\tuse the experimental waters in the file to calculate distances and success rate")

    print("\n\tW-atoms scoring & cleaning")
    print("\t--------------------------")
    print("\t    Maps")
    print("\t    -m protein.X.map\tuse map file to decide wich waters are kept")
    print("\t    -x float\tmap affinity cutoff [default %2.2f ]" % WAFFINITY_CUTOFF)
#    print "\t    Distance"
#    print "\t    -M [float]\tmerge W-atoms that are closer than the distance [ default : %1.2f Angstrom ]" % OVERLAP_DISTANCE


    print("\t    -q\t\tquiet mode. Only dir name and the RMSD are printed out.")
    print("\t    -d\t\tsave the \"DEBUG_*\" files\n")

    print("\t\tIf the receptor.pdbqt is omitted, only self-overlapping ligand waters will be removed")
    print("\t\tBy default, the following filters are performed:")
    print("\t\t\t - ligand-atoms overlapping waters ")
    print("\t\t\t - hanging waters (not interacting with any protein atoms)")
    print("\t\t\t - off-waters (waters overlapping with receptor atoms [if provided])")
    print("\t\t\t - clustering of close waters (DISABLED!)")

    print("\n\tReference  ")
    print("\t----------")
    print("\t     Please cite:  Stefano Forli and Arthur J. Olson, J. Med. Chem., 2012, 55 (2), pp 623-638")
    print("\t                   DOI: 10.1021/jm2005145")
    
    print("\n\n")



# OPTIONS
try:
    options, structures = getopt.getopt(argv[1:], 'EhdcCqr:w:m:M:x:p:n:Fi:')
except getopt.GetoptError as err:
 usage()
 print(str(err), "\n")
 exit(2)

opts = {} # create the option dictonary with {option: argument} format

for o, a in options: # populate the options
 opts[o] = a

if '-i' in opts:
    input_file = opts['-i']
else:
    print("ERROR! Input file ('-i') is required!")
    usage()
    exit(0)


if '-h' in opts: # or '--help' in opts:
 usage()
 exit(0)

if '-q' in opts:
    QUIET = True

if '-F' in opts: FULL = True
else: FULL = False

if '-d' in opts:
  print("\n\n\t[ Debugging mode is *ON* ]\n\n")
  DEBUG = True

if not QUIET: 
    print("""
                  ____                      
                 /\  _`\                    
                 \ \ \/\ \  _ __  __  __    
                  \ \ \ \ \/\`'__\\\\ \/\ \   
                   \ \ \_\ \ \ \/\ \ \_\ \  
                    \ \____/\ \_\ \/`____ \ 
                     \/___/  \/_/  `/___/> \\
                                      /\___/
                                      \/__/ 

    """)
    print("========================== INPUT DATA =========================")



PDBQTplus = False
try:
    #if True:
    #name, ext = os.path.splitext(structures[0])
    name, ext = os.path.splitext(input_file)
    if not ext == ".dlg" and ('-c' in opts or '-C' in opts or '-E' in opts ):
        print("ERROR: clustering-related options can be used only with DLG files")
        exit(1)

    if ext == ".dlg":
        if not QUIET: print(" [ using DLG file %s ]" % input_file) #(structures[0])
        docking = parse_dlg(input_file, raw_output = True) # TODO it must become: docking, header, footer
        if not QUIET: print("    parsing docked MODELS...", end=' ')
        models = parse_models(docking)
        if not len(models):
            print("no docking results in the file")
            exit(1)
    else:
        # TODO put a function for loading simple pdbqt
        if not QUIET: print(" importing ATOMS from ", input_file) # structures[0]
        models = [parse_pdbqt(input_file) ] # TODO it must become: docking, header, footer
        # CHECK HERE IF THERE'S A PDBQT+
        tmp = get_lines(input_file)
        for l in tmp:
            if "ADVS_result" in l:
                PDBQTplus=True
                atom_counts = countWatoms(models)
                if not QUIET: print(" [found PDBQT+ : %d atoms ; %d waters ]" % (atom_counts))
                break
        if not len(models):
            print("no docking results in the file")
            exit(1)
        pass
    #print "\n",
except:
    #else:
    if not QUIET:
        print("ERROR: input file is missing")
        usage()
        #exit(1)
    #else:
    #    print "[missing %s]" % structures[0]
    exit(1)


if '-n' in opts:
    try:
        norm = float(opts['-n'])
        print(" normalize energy %2.3f Kcal/mol x W atom" % norm)

    except:
        print("Error in the normalization value!")
        usage()
        exit(1)

else: norm = DEFAULT_PENALTY 


if '-M' in opts:
    # cutoff for overlapping distance between W atoms
    CLUSTER = True
    try:
        OVERLAP_DISTANCE = float(opts['-M'])
    except:
        pass
else:
    CLUSTER = False
    

if '-m' in opts:
    try:
        if not QUIET: print("\n [ using map file %s ]" % (opts['-m']))
        grid = get_lines(opts['-m'])
        if '-x' in opts:
            try:
                WAFFINITY_CUTOFF = -float(opts['-x'])
            except:
                print("Error in maps cutoff value: %s" % opts['-x'])
                exit(1)
            if not QUIET: print("Affinity cutoff = ", WAFFINITY_CUTOFF)
    except:
        print("ERROR! Impossible to open map file : %s" % opts['-m'])
        exit(1)
else:
    grigrid = False



if '-w' in opts:
    if not QUIET: print("\n [ getting waters from %s ]" % opts['-w'])
    waters = []
    tmp = get_lines(opts['-w'])
    for a in tmp:
        if atom_type(a) == "O":
            waters.append(a)
else:
    waters = False


if not QUIET:
    print("===============================================================\n")



if '-E' in opts:
    if not QUIET: print("\n[ saving the lowest energy result only ]")
    output_filename = name+"_RINSE_LE.pdbqt"
    out = open(output_filename, 'w')
    pdb = os.getcwd()
    pdb = os.path.basename(pdb)
    print(pdb+","+name+",", end=' ')
    save_output_pdbqt(models, out, count = 1, nowaters = True, printrms = True)
    exit(0)



if '-C' in opts:
    out = open(name+"_RINSE_LELC.pdbqt", 'w')
    lelc = get_lelc(structures[0])
    if not len(lelc):
        print("no docking results in the file")
        exit(1)
    if not QUIET:
        print("\n[ saving the rinsed LELC result only ]")
    else:
        pdb = os.getcwd()
        pdb = os.path.basename(pdb)
        print(pdb+","+name+","+lelc[2])
    save_output_pdbqt(models, out, count = 0, nowaters = True, index=lelc[0] )
    exit(0)



# Check if we're in receptor mode
if '-r' in opts:
    try:
        rec = load_receptor(opts['-r'])
        if not QUIET: print("\n receptor structure loaded\t", end=' ')
        if not QUIET: print("\t\t [ %d atoms ]" % len(rec))
        MIN, MAX = minmax (models)
        # include only receptor atoms in the max/min ligand coordinates to speed up the computation
        rec_box = in_the_box(rec, MIN, MAX)
        if DEBUG: save_pdb(rec_box, "DEBUG_box.pdb")
        if not QUIET: print(" receptor 5A shell extracted ", end=' ')
        if not rec_box:
            print("ERROR: no receptor atoms are close to the ligand")
            exit(1)
        else:
            if not QUIET: print("\t\t\t [ %d atoms in 5 A shell ] " % len(rec_box))
        REC = True
    except:
        print("ERROR: something bad happened when reading the receptor file", rec)
        exit(1)
else:
    if not QUIET: print(" [ no receptor ]")
    REC = False


if not models:
    print("ERROR: no docking poses have been found")
    exit(1)


suffix = "" #name



#print "\n-PROCESSING THE DATA --------------------------"

### Pose extraction
if '-c' in opts:
    lelc = get_lelc(input_file)
    #lelc = get_lelc(structures[0])
    lelc_idx = lelc[3]
    if not QUIET:
        print("\n LELC docked pose\t\t\t\t [ biggest cluster: ", lelc[1], " AutoDock RMSD :", lelc[2], " Run #", lelc[3], "] \n")

    models = [ models[ lelc_idx-1 ] ]
    suffix = "_LELC"
elif '-e' in opts:
    models = [ models[0]]
    print("WARNING! NEVER TESTED!")
    suffix = "_LE"

elif '-p' in opts:
    try:
        cluster_number = int(opts['-p'])
        if not QUIET: print(" [extracting cluster # %d ]" % (cluster_number))
        models = [ models[cluster_number-1]]
        suffix = ("_pose_%d" % int(opts['-p']))

    except:
        print("ERROR! please specify a number")
        exit(1)
### Pose extraction

if not QUIET: print(" removing ligand/ligand overlapping waters\t", end=' ')
clean = selfoverlap(models)
suffix += "_DRY"

if REC:
    if not QUIET: print(" removing ligand/receptor overlapping waters\t", end=' ') 
    clean = targetoverlap(clean, rec_box)

if CLUSTER:
    if not QUIET: print(" clustering remaining waters", end=' ')
    clean = cluster_waters(clean)
    #print "CLEAN1",len(clean)

def write_this(filename, list):
    fp = open(filename, 'w')
    for a in list:
        print(a.strip(), file=fp)
    fp.close()

if grid :
    if not QUIET: print("\n scanning grid map for conserved waters...\t", end=' ') 
    grid = getmap(grid)

    watoms = get_waters(clean)
    #print "len", len(watoms)
    #print "CLEAN2", len(clean)
    conserved_waters_s = {}
    conserved_waters_w = {}

    if not QUIET: print("\n water grid score results [ map: %s ] " % (opts['-m']))
    for w in watoms:
        affinity = getpoints(grid, coord(w), distance = 1.0) # angstroms
        #WAFFINITY_STRONG = -0.45 
        #WAFFINITY_WEAK   = -0.3

        # WAFFINITY_CUTOFF = -0.35 # map value to keep waters # 1efy:0.35
        # WAFFINITY_STRONG = -0.5 
        # WAFFINITY_WEAK   = -0.3

        # strong water
        if affinity < WAFFINITY_STRONG and not (w.strip() in conserved_waters_s):
                if not QUIET: 
                    print("\t [ Water STRONG ( %2.2f ) +++ ]" % affinity)
                conserved_waters_s[w] = affinity

        # weak water
        #elif (affinity < WAFFINITY_WEAK) and not (affinity > WAFFINITY_CUTOFF) and not (w.strip() in conserved_waters_w):
        elif (affinity < WAFFINITY_WEAK) and not (w.strip() in conserved_waters_w):
                if not QUIET: print("\t [ Water  WEAK  ( %2.2f )  +  ]" % affinity)
                conserved_waters_w[w] = affinity
        # displaced water
        else:
            if not QUIET: print("\t [ Water DISPLC ( %2.2f )  D  ]" % affinity)

    if DEBUG:
        confirmed_water = open("test_maps_confirmed.pdb", 'w')
        if conserved_waters_s:
            print("MODEL\nREMARK Strong waters", file=confirmed_water)
            for w in conserved_waters_s:
                print(w.strip(), file=confirmed_water)
                print(("REMARK map affinity : %2.3f" % conserved_waters_s[w]), file=confirmed_water)
            print("ENDMDL", file=confirmed_water)
        if conserved_waters_w:
            print("MODEL\nREMARK Weak waters", file=confirmed_water)
            for w in conserved_waters_w:
                print(w.strip(), file=confirmed_water)
                print(("REMARK map affinity : %2.3f" % conserved_waters_w[w]), file=confirmed_water)
            print("ENDMDL", file=confirmed_water)
        confirmed_water.close()

    suffix += "_SCORED"

    atoms = []
    dry_version = []
    weak_w = []
    strong_w = []
    #print "NAME", name
    #print "NAME NAME NAME = ", name+suffix+'.pdbqt'
    for a in models[0]:
        if pdbqt_atom_type(a) == "W":
            if a in list(conserved_waters_s.keys()):
               atoms.append(("REMARK  STRONG water ( score: %2.2f )" % conserved_waters_s[a]))
               strong_w.append(a)
               a = a[0:55]+" 1.00 30.00           "+a[77:]
               atoms.append(a)

            elif a in list(conserved_waters_w.keys()):
               atoms.append(("REMARK  WEAK   water ( score: %2.2f )" % conserved_waters_w[a]))
               weak_w.append(a)
               a = a[0:55]+" 1.00 80.00           "+a[77:]
               atoms.append(a)
        else:
            dry_version.append(a)
            if a.startswith("ATOM") or a.startswith("HETATM"):
                a = a[0:55]+" 1.00 10.00           "+a[77:]
            atoms.append(a)
            
    output = open(name+suffix+".pdbqt", 'w')
    if not PDBQTplus:
        for a in atoms:
            output.write(a+'\n')
        output.close()
    else:
        atoms = scoreCorrection(atoms, atom_counts, norm)
        for a in atoms:
            output.write(a)
        output.close()

    if FULL:
        # option to handle ADT bad limitations...
        write_this(name+"_ADT_DRY.pdb", dry_version)
        write_this(name+"_ADT_STRONG.pdb", strong_w)
        write_this(name+"_ADT_WEAK.pdb", weak_w)
else:
    print("[ WARNING: maps are ignored in multi-poses results]")
#exit()



if waters : #and ('-c' in opts or '-E' in opts):
    print("\n Predicted VS experimental \t2.0 A    \t   2.5 A")
    print(" ========================= \t-----    \t   -----")
    if map:
        if conserved_waters_s:
            calc_list = list(conserved_waters_s.keys())
            found_2, distance_log = confirm_predictions(calc_list, waters, cutoff = MATCH_DISTANCE)
            found_2_atoms = []
            for x in distance_log:
                found_2_atoms.append(x[1])
            found_25, distance_log = confirm_predictions(calc_list, waters, cutoff = MATCH_DISTANCE+.5)
            percentage_2 =  pc(found_2, len(calc_list))
            percentage_25 = pc(found_25, len(calc_list))
            print(" Predicted STRONG  [ %d ] :\t%02.2f%% (%d)\t %2.2f%% (%d)" % (len(calc_list), percentage_2, found_2, percentage_25, found_25))
            write_this(name+"_matching_waters_STRONG.pdb", found_2_atoms)

            if DEBUG: 
                found_3, distance_log = confirm_predictions(calc_list, waters, cutoff = 3.51)
                experimental_in_3A_range_STRONG = []
                for x in distance_log:
                    experimental_in_3A_range_STRONG.append(x[1])
                write_this(name+"_DEBUG_matching_waters_3Amax_range.pdb", experimental_in_3A_range_STRONG)

        if conserved_waters_w:
            calc_list = list(conserved_waters_w.keys())
            found_2, distance_log = confirm_predictions(calc_list, waters, cutoff = MATCH_DISTANCE)
            found_2_atoms = []
            for x in distance_log:
                found_2_atoms.append(x[1])
            found_25, distance_log = confirm_predictions(calc_list, waters, cutoff = MATCH_DISTANCE+.5)
            percentage_2 =  pc(found_2, len(calc_list))
            percentage_25 = pc(found_25, len(calc_list))
            print(" Predicted  WEAK   [ %d ] :\t%02.2f%% (%d)\t %2.2f%% (%d)" % (len(calc_list), percentage_2, found_2, percentage_25, found_25))
            write_this(name+"_matching_waters_WEAK.pdb", found_2_atoms)

            if DEBUG: 
                found_3, distance_log = confirm_predictions(calc_list, waters, cutoff = 3.51)
                experimental_in_3A_range_WEAK = []
                for x in distance_log:
                    experimental_in_3A_range_WEAK.append(x[1])
                write_this(name+"_DEBUG_matching_waters_3Amax_range_WEAK.pdb", experimental_in_3A_range_STRONG)
        print("\n", end=' ')

    else:
        calc_list = get_waters(get_lines(output_filename))
        for a in get_lines(output_filename):
            if a[0:4] == "ATOM" or a[0:6] == "HETATM":
                if pdbqt_atom_type(a) == "W":
                    calc_list.append(a)
    if False:
        found_2, distance_log = confirm_predictions(calc_list, waters, cutoff = MATCH_DISTANCE)
        found_25, distance_log = confirm_predictions(calc_list, waters, cutoff = MATCH_DISTANCE+.5)
        percentage_2 =  pc(found_2, len(calc_list))
        percentage_25 = pc(found_25, len(calc_list))
        if not QUIET: 
            print("Reality percentage [2.0 A ]: %2.2f [ %d ]" % (percentage_2, found_2))
            print("Reality percentage [2.5 A ]: %2.2f [ %d ] " % (percentage_25, found_25))
        else:
            print("found_2.0A:%d(%2.2f)," % (found_2, percentage_2), end=' ')
            print("found_2.5A:%d(%2.2f)"  % (found_25, percentage_25))


if DEBUG:
    rec = open("DEBUG_rec_overlap.pdb", 'w')
    lig = open("DEBUG_lig_overlap.pdb", 'w')
    lone_lone = open("DEBUG_lone.pdb", 'w')
    log_file = open("DEBUG_log_file.log", "w")
#    print "LIG_LIG"
    for i in lig_lig:
        print(i[:-1], file=lig)

#    print "LONE"
    for i in lone:
        print(i[:-1], file=lone_lone)

#    print "REC"
    for i in lig_rec:
        print(i[:-1], file=rec)

    for i in LOG:
        print(i[:-1], file=log_file)


exit(0)



### USELESS (REMOVE)
# waters overlapping ligand atoms
# waters overlapping receptor atoms
# waters hanging in the space

### USEFUL

# waters close to any receptor acceptor atoms (NA, OA, SA)

# clustering algorithm

# waters overlapping waters ( counter for how many water are overlapping
   # average water position?