File: gui.py

package info (click to toggle)
microbegps 1.0.0-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 166,876 kB
  • sloc: python: 2,786; makefile: 12
file content (1742 lines) | stat: -rw-r--r-- 73,930 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
# -*- coding: utf-8 -*-
"""
@author: lindnerm
"""

import sys, math
import pkgutil
import pkg_resources
import threading
import cPickle

from PyQt4 import QtGui, QtCore

import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt4agg import FigureCanvasQTAgg as FigureCanvas
import matplotlib_venn as venn


try:
	import microbegps.gps as gps
except ImportError:
	import gps
import taxonomy
import modules

from __version__ import __version__,__url__


#from gui.help_texts import HelpTexts as HelpTexts
HelpTexts = {
"name":"<b>Name</b><br><br>The Name column displays the names of the found candidate organisms. The candidate name is either the name of the supporting genome with the highest validity, or the lowest common ancestor of all supporting genomes (if taxonomic information is available).<br><br>A Candidate can be expanded by double clicking to view its <i>supporting genomes</i>. The names of the supporting genomes are inferred from the IDs in the original Fasta files. More information about name mapping can be found in the documentation.",
"candidate":"<b>Candidate Organism</b><br><br>A Candidate organism represents an organism that is potentially present in the dataset. It delimits from the other candidates by sufficient uniquely matching reads and a compatible genome coverage. A Candidate is supported by one or more genomes providing clues about the phylogenetic origin of the Candidate.",
"candidate_reads":"<b>Candidate Reads</b><br><br>Shows the number of all reads that were assigned to this Candidate. Note that a read may have mapped to multiple Genomes supporting this Candidate.",
"candidate_unique":"<b>Candidate Unique Reads</b><br><br>Shows the number of reads that were uniquely assigned to this Candidate. Note that a read may have mapped to multiple Genomes supporting this Candidate.",
"candidate_coverage":"<b>Candidate Coverage</b><br><br>Shows the average coverage over all genomes supporting this Candidate.",
"candidate_validity":"<b>Candidate Validity</b><br><br>The Candidate validity is the maximum observed validity of all supporting genomes.",
"candidate_homogeneity":"<b>Candidate Homogeneity</b><br><br>Shows the average read distribution homogeneity. The distribution of the reads over the genome is compared to a uniform distribution with a Kolmogorov-Smirnov Test. Here, only the averag test statistic of all supporting genomes is reported. Take a look at the supporting genome homogeneity for more informative values.",
"candidate_error":"<b>Candidate Mapping Error</b><br><br>The mapping error of all reads mapping to this Candidate is averaged. If a read maps to multiple genomes, the error rates for each genome are treated individually.",
"reference":"<b>Supporting Genome</b><br><br>A genome supports the existence of a Candidate organism. A high number of unique reads and high validity in combination with low Homogeineity score and Mapping Error are a strong hint that this genome is present in the dataset.",
"reference_reads":"<b>Number of Reads</b><br><br>The total number of reads mapping to this genome. Reads removed by any previous filter step are not counted.",
"reference_unique":"<b>Number of Unique Reads</b><br><br>The number of reads mapping uniquely to this genome.",
"reference_coverage":"<b>Local Genome Coverage</b><br><br>Shows the <i>local</i> coverage on this genome (excluding regions that are not covered). Reads removed by any previous filter step are not counted.",
"reference_validity":"<b>Genome Validity</b><br><br>The validity of the genome is the fraction of the genome that is (potentially) covered by reads. A high validity indicates, that large parts of the reference genome agree with the organism found in the dataset. Note that the validity may depend on the read quality, the reference genome quality and the read mapper!",
"reference_homogeneity":"<b>Coverage Homogeneity</b><br><br>Shows the read distribution homogeneity on this genome. The distribution of the reads over the genome is compared to a uniform distribution with a Kolmogorov-Smirnov Test. Here, only the test statistic is reported. Genomes with p<0.05 are highlighted in blue.",
"reference_error":"<b>Mapping Error</b><br><br>The average mapping error of all reads mapping to this genome. If a read of length 100 has 10 errors (insertions, deletions, mutations), the error rate is 0.1.",
"all reads":"<b>All Reads</b><br><br>A list of all reads mapping to this genome. Double click to load the list of reads. Warning: this may take a while for high numbers of reads.",
"unique reads":"<b>Unique Reads</b><br><br>A list of all reads mapping uniquely to this genome. Double click to load the set of unique reads. Warning: this may take a while for high numbers of reads.",
"related":"<b>Related Genomes</b><br><br>Double click to load the set of genomes sharing reads with this genome. Genomes supporting the same Candidate as this genome are ignored. The color coding highlights genomes that share a high fraction of all reads.",
"read":"<b>Mapped Read</b><br><br>A read from the metagenomic dataset. The column 'Mapping Error' shows the mapping error of the read to the current genome. Double click to see all genomes where this read was mapped.",
"unique read":"<b>Unique Read</b><br><br>A read mapping uniquely to the selected genome.",
"match":"<b>Matching Genome</b><br><br>Other genome that has shared reads with the selected genome. Double click to jump to the results of this genome."}

SettingsHelpTexts = [
"<b>Chose Input Directory</b><br><br>Select the data to be analyzed.<br><br>All SAM files in <i>Input Directory</i> are analyzed and treated as one dataset.",
"""<b>Apply Raw Filters</b><br><br>Several filters can be applied in this step to reduce the computational effort in the following steps.<br><br>
<i>Minimum Genome Support</i> discards all genomes with less than a specified number of reads.<br><br>
<i>Max. Read Matches</i> discards all reads matching to more than a specified number of genomes. These reads are considered as uninformative.<br><br>
<i>Max. Read Mapping Error</i> discards all read mappings with error above a specified value. The error is the fraction of not matching positions to the total read length.<br><br>
<i>Mapping Error Quantile</i> keeps the specified quantile of the best matching reads. Default: <i>1</i> keeps all reads.""",
"""<b>Calculate Reference Table</b><br><br>The reference table represents the set of organisms for the GPS Analyzer. Each reference can represent several contigs/fragments of a single organism. Therefore, the sequence IDs in the SAM files can be mapped to arbitrary custom names (which are displayed in GPS Analyzer).<br><br>
<i>No Mapping</i> directly uses the sequence ID as reference name, i.e. the raw sequence IDs are displayed in the Analyzer.<br><br>
<i>Mapping File</i> specifies a Tab Separated Text-File, that maps sequence IDs to reference names. Note that multiple IDs may map to one reference name.<br><br><u>Example:</u><br><tt>
contig_1.0001[tab]Organism One<br>
contig_1.0002[tab]Organism One<br>
contig_2.0001[tab]Organism Two<br>
contig_2.0002[tab]Organism Two<br>
contig_3.0001[tab]Another One
</tt>""",
"""<b>Quality Filtering</b><br><br>
<i>Min. Number Unique Reads</i> discards references with less than a specified number of unique reads (reads mapping uniquely to this organism, not contig/chromosome)<br><br>
<i>Max. Homogeneity:</i> The homogeneity of the read distribution over the genome is measured by comparing the read distribution to a uniform distribution using the Kolmogorov-Smirnov test statistic. Organisms with a test statistic higher than the specified value are discarded.""",
"""<b>Calculate Candidates</b><br><br>
First, a subset of reads is selected that is suitable for calculating the candidate organisms. Then, the references are grouped to candidates via shared reads in the selected subset.<br><br>
<i>Min. Genome Validity</i> discards all organisms below the specified validity threshold. The validity is the estimated fraction of the genome that could be covered by reads.<br><br>
<i>Coverage Similarity</i> sets the required relative coverage difference between all matches for a read.<br><br>
<i>Fraction Shared USR</i> sets for a reference the minimum required fraction of Unique Source Reads (USR) to be grouped with another reference.<br><br>
<i>Fraction Shared Reads</i> sets for a reference the minimum required fraction of reads to be grouped with another reference.""",
"<b>Visualize Results</b><br><br>Send the calculated results to the GPS Analyzer. Optionally close this window after completing the analysis"
]


class Pipeline(QtGui.QWidget):
	def __init__(self,mainWindow):
		super(Pipeline, self).__init__()
		self.mainWindow = mainWindow
		self.initUI()

	def initUI(self):
		self.redIcon = QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/red.png'))
		self.yellowIcon = QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/yellow.png'))
		self.greenIcon = QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/green.png'))
		self.errorIcon = QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/error.png'))
		self.openIcon = QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/open.png'))
		
		self.stepsDone = -1
		self.pipelineSteps = [self.step_load_data,
						  self.step_raw_filter,
						  self.step_reference_table,
						  self.step_quality_filter,
						  self.step_candidates,
						  self.step_visualize]
		self.currentPipelineThread = None
		
		recipeLbl = QtGui.QLabel('Pipeline')
		settingsLbl = QtGui.QLabel('Settings')

		self.recipeList = QtGui.QListWidget()
		itemNames = ['Load Data', 'Raw Filter', 'Calculate Reference Table', 'Quality Filtering', 'Calculate Candidates', 'Visualize Results']	
		for name in itemNames:
			item = QtGui.QListWidgetItem(self.redIcon,name)
			item.setFlags(QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable)
			self.recipeList.addItem(item)
		self.recipeList.setMinimumWidth(230)
		self.recipeList.setIconSize(QtCore.QSize(24,24))
		self.recipeList.currentItemChanged.connect(self.recipeStepSelected)
		

		# create all settings pages
		self.settingsBox = QtGui.QStackedWidget()

		# page 0: load data
		self.fileNameEdit = QtGui.QLineEdit()
		selectButton = QtGui.QPushButton(self.openIcon,'')
		selectButton.clicked.connect(self.selectDirectory)
		selectButton.setToolTip('Open directory containing SAM files')
		sublayout = QtGui.QHBoxLayout()
		sublayout.addWidget(self.fileNameEdit,stretch=4)
		sublayout.addWidget(selectButton,stretch=1)
		subwidget = QtGui.QWidget()
		subwidget.setLayout(sublayout)
		layout = QtGui.QGridLayout()
		layout.setAlignment(QtCore.Qt.AlignTop | QtCore.Qt.AlignLeft)
		layout.addWidget(QtGui.QLabel('Input Directory'),0,0)
		layout.addWidget(subwidget,0,1)
		loadDataWidget = QtGui.QWidget()
		loadDataWidget.setLayout(layout)
		self.settingsBox.addWidget(loadDataWidget)

		# page 1: Raw Filter
		self.minSupportBox = QtGui.QSpinBox()
		self.minSupportBox.setRange(1,1000000000)
		self.minSupportBox.setValue(50)
		self.minSupportBox.setMinimumWidth(100)
		self.minSupportBox.valueChanged.connect(lambda: self.modifiedSettings(1))
		self.maxMatchesBox = QtGui.QSpinBox()
		self.maxMatchesBox.setRange(2,1000000000)
		self.maxMatchesBox.setValue(80)
		self.maxMatchesBox.setMinimumWidth(100)
		self.maxMatchesBox.valueChanged.connect(lambda: self.modifiedSettings(1))
		self.maxMappingError = QtGui.QDoubleSpinBox()
		self.maxMappingError.setDecimals(2)
		self.maxMappingError.setRange(0,1)
		self.maxMappingError.setValue(1)
		self.maxMappingError.setSingleStep(0.05)
		self.maxMappingError.setMinimumWidth(100)
		self.maxMappingError.valueChanged.connect(lambda: self.modifiedSettings(1))
		self.mappingErrorQuantile = QtGui.QDoubleSpinBox()
		self.mappingErrorQuantile.setDecimals(2)
		self.mappingErrorQuantile.setRange(0,1)
		self.mappingErrorQuantile.setValue(1)
		self.mappingErrorQuantile.setSingleStep(0.05)
		self.mappingErrorQuantile.setMinimumWidth(100)
		self.mappingErrorQuantile.valueChanged.connect(lambda: self.modifiedSettings(1))
		layout = QtGui.QFormLayout()
		layout.setFormAlignment(QtCore.Qt.AlignTop | QtCore.Qt.AlignLeft)
		layout.addRow(QtGui.QLabel('Minimum Genome Support'),self.minSupportBox)
		layout.addRow(QtGui.QLabel('Max. Read Matches'),self.maxMatchesBox)
		layout.addRow(QtGui.QLabel('Max. Read Mapping Error'),self.maxMappingError)
		layout.addRow(QtGui.QLabel('Mapping Error Quantile'),self.mappingErrorQuantile)
		rawFilterWidget = QtGui.QWidget()
		rawFilterWidget.setLayout(layout)
		self.settingsBox.addWidget(rawFilterWidget)
		
		# page 2: Calculate Reference Table
		self.useIdMapping = QtGui.QRadioButton('No Mapping')
		self.useIdMapping.toggled.connect(lambda: self.modifiedSettings(2))
		self.useMappingFile = QtGui.QRadioButton('Mapping File')
		self.useMappingFile.toggled.connect(lambda: self.modifiedSettings(2))
		self.mappingFileEdit = QtGui.QLineEdit()
		self.useNCBIFiles = QtGui.QRadioButton('NCBI catalog')
		self.useNCBIFiles.setChecked(True)
		self.useNCBIFiles.toggled.connect(lambda: self.modifiedSettings(2))
		self.NCBICatalogPath = QtGui.QLineEdit('<default>')
		self.NCBICatalogPath.setToolTip('Clear this field to use the default catalog.')
		selectButton1 = QtGui.QPushButton(self.openIcon,'')
		selectButton1.clicked.connect(self.selectMappingFile)
		selectButton1.setToolTip('Load Mapping File')
		selectButton2 = QtGui.QPushButton(self.openIcon,'')
		selectButton2.clicked.connect(self.selectNCBICatalog)
		selectButton2.setToolTip('Load NCBI catalog file')
		sublayout1 = QtGui.QHBoxLayout()
		sublayout1.addWidget(self.mappingFileEdit,stretch=4)
		sublayout1.addWidget(selectButton1,stretch=1)
		subwidget1 = QtGui.QWidget()
		subwidget1.setLayout(sublayout1)
		sublayout2 = QtGui.QHBoxLayout()
		sublayout2.addWidget(self.NCBICatalogPath,stretch=4)
		sublayout2.addWidget(selectButton2,stretch=1)
		subwidget2 = QtGui.QWidget()
		subwidget2.setLayout(sublayout2)
		layout = QtGui.QGridLayout()
		layout.setAlignment(QtCore.Qt.AlignTop | QtCore.Qt.AlignLeft)
		layout.addWidget(self.useIdMapping,0,0)
		layout.addWidget(self.useMappingFile,1,0)
		layout.addWidget(subwidget1,1,1)
		layout.addWidget(self.useNCBIFiles,2,0)
		layout.addWidget(subwidget2,2,1)
		referenceTableWidget = QtGui.QWidget()
		referenceTableWidget.setLayout(layout)
		self.settingsBox.addWidget(referenceTableWidget)
		
		# page 3: Quality Filtering
		self.minAbsoluteUnique = QtGui.QSpinBox()
		self.minAbsoluteUnique.setRange(0,1000000000)
		self.minAbsoluteUnique.setValue(20)
		self.minAbsoluteUnique.setMinimumWidth(100)
		self.minAbsoluteUnique.valueChanged.connect(lambda: self.modifiedSettings(3))
		self.maxHomogeneity = QtGui.QDoubleSpinBox()
		self.maxHomogeneity.setDecimals(3)
		self.maxHomogeneity.setValue(0.6)
		self.maxHomogeneity.setRange(0.,1.)
		self.maxHomogeneity.setSingleStep(0.05)
		self.maxHomogeneity.setMinimumWidth(100)
		self.maxHomogeneity.valueChanged.connect(lambda: self.modifiedSettings(3))
		self.minValidity = QtGui.QDoubleSpinBox()
		layout = QtGui.QFormLayout()
		layout.setFormAlignment(QtCore.Qt.AlignTop | QtCore.Qt.AlignLeft)
		layout.addRow(QtGui.QLabel('Min. Number Unique Reads'),self.minAbsoluteUnique)
		layout.addRow(QtGui.QLabel('Max. Homogeneity'),self.maxHomogeneity)

		qualFilterWidget = QtGui.QWidget()
		qualFilterWidget.setLayout(layout)
		self.settingsBox.addWidget(qualFilterWidget)
		
		# page 4: Calculate Candidates
		self.minValidity.setDecimals(3)
		self.minValidity.setValue(0.001)
		self.minValidity.setRange(0.,1.)
		self.minValidity.setSingleStep(0.05)
		self.minValidity.setMinimumWidth(100)
		self.minValidity.valueChanged.connect(lambda: self.modifiedSettings(4))
		self.coverageSimilarity = QtGui.QDoubleSpinBox()
		self.coverageSimilarity.setDecimals(2)
		self.coverageSimilarity.setValue(0.2)
		self.coverageSimilarity.setMinimum(0)
		self.coverageSimilarity.setSingleStep(0.05)
		self.coverageSimilarity.setMinimumWidth(100)
		self.coverageSimilarity.valueChanged.connect(lambda: self.modifiedSettings(4))
		self.sharedFractionUSR = QtGui.QDoubleSpinBox()
		self.sharedFractionUSR.setDecimals(2)
		self.sharedFractionUSR.setValue(0.2)	
		self.sharedFractionUSR.setRange(0.,1.)
		self.sharedFractionUSR.setSingleStep(0.05)
		self.sharedFractionUSR.setMinimumWidth(100)
		self.sharedFractionUSR.valueChanged.connect(lambda: self.modifiedSettings(4))
		self.sharedFractionAll = QtGui.QDoubleSpinBox()
		self.sharedFractionAll.setDecimals(2)
		self.sharedFractionAll.setValue(0.6)	
		self.sharedFractionAll.setRange(0.,1.)
		self.sharedFractionAll.setSingleStep(0.05)
		self.sharedFractionAll.setMinimumWidth(100)
		self.sharedFractionAll.valueChanged.connect(lambda: self.modifiedSettings(4))
		layout = QtGui.QFormLayout()
		layout.setFormAlignment(QtCore.Qt.AlignTop | QtCore.Qt.AlignLeft)
		layout.addRow(QtGui.QLabel('Min. Genome Validity'),self.minValidity)
		layout.addRow(QtGui.QLabel('Coverage Similarity'),self.coverageSimilarity)
		layout.addRow(QtGui.QLabel('Fraction Shared USR'),self.sharedFractionUSR)
		layout.addRow(QtGui.QLabel('Fraction Shared Reads'),self.sharedFractionAll)
		extractUSRWidget = QtGui.QWidget()
		extractUSRWidget.setLayout(layout)
		self.settingsBox.addWidget(extractUSRWidget)
		
		# page 5: Visualize
		self.closeOnFinish = QtGui.QCheckBox('Close this window after visualizing results')
		layout = QtGui.QVBoxLayout()
		layout.addWidget(self.closeOnFinish)
		visualizeWidget = QtGui.QWidget()
		visualizeWidget.setLayout(layout)
		self.settingsBox.addWidget(visualizeWidget)		
		
		# Text boxes
		self.descriptionText = QtGui.QTextEdit()
		self.descriptionText.setReadOnly(True)
		
		self.consoleText = QtGui.QTextEdit()
		self.consoleText.setReadOnly(True)
		consoleFont = self.consoleText.font()
		consoleFont.setPointSize(self.font().pointSize()-1)
		self.consoleText.setFont(consoleFont)
		
		# Progress bar
		self.progress = QtGui.QProgressBar()
		
		consoleLayout = QtGui.QVBoxLayout()
		consoleLayout.addWidget(self.progress)
		consoleLayout.addWidget(self.consoleText)
		consoleWidget = QtGui.QWidget()
		consoleWidget.setLayout(consoleLayout)

		self.runStepBtn = QtGui.QPushButton("Run Step")
		self.runStepBtn.clicked.connect(self.runOneStep)
		self.runPipelineBtn = QtGui.QPushButton("Run Pipeline")
		self.runPipelineBtn.clicked.connect(self.runPipeline)
		buttonLayout = QtGui.QHBoxLayout()
		buttonLayout.addWidget(self.runStepBtn)
		buttonLayout.addWidget(self.runPipelineBtn)
		buttonBar = QtGui.QWidget()
		buttonBar.setLayout(buttonLayout)
		
		gridLayout = QtGui.QGridLayout()
		gridLayout.setColumnStretch(0,2)
		gridLayout.setColumnStretch(1,2)
		gridLayout.addWidget(recipeLbl,0,0)
		gridLayout.addWidget(settingsLbl,0,1)
		gridLayout.addWidget(self.recipeList,1,0)
		gridLayout.addWidget(self.settingsBox,1,1)
		gridLayout.addWidget(self.descriptionText,2,0)
		gridLayout.addWidget(consoleWidget,2,1)
		gridLayout.addWidget(buttonBar,3,0,1,2)
		self.setLayout(gridLayout)
		
		self.setMinimumHeight(600)
		self.setMinimumWidth(400)
		self.setWindowTitle('GPS Pipeline')
	
	class LoadDataThread(QtCore.QThread):
		done   = QtCore.pyqtSignal(dict,dict)
		failed = QtCore.pyqtSignal(str)
		status = QtCore.pyqtSignal(int,int)
		def __init__(self, parent):
			QtCore.QThread.__init__(self, parent)
		def run(self):
			p = self.parent()
			p.pr('<b>Started loading data.</b><br>--- This may take a while.')
			directory = str(p.fileNameEdit.text())
			if not directory.endswith('.sam'):
				if directory.endswith('/') or directory.endswith('\\'):
					directory += '*.sam'
				else:
					directory += '/*.sam'
			def status_reporter(n,total):
				self.status.emit(n,total)
			try:
				target_table,read_table = gps.read_sam_files_no_pysam(directory,status_reporter)
			except Exception as e:
				self.failed.emit(e.message)
				raise
			else:
				self.done.emit(target_table,read_table)
	
	def step_load_data(self):
		thr = self.LoadDataThread(self)
		def finished_successful(target_table,read_table):
			self.target_table = target_table
			self.read_table = read_table
			self.stepsDone = 0
			self.pr('---Found %i reads mapping to %i targets.<br><b>Finished Loading Data.</b>'%(len(read_table),len(target_table)))
			self.recipeList.item(0).setIcon(self.greenIcon)
		def finished_error(text):
			self.pr('<b><font color="DarkRed">Error in step 1!</font></b><br>Message: <i>'+text+'</i>')
			self.recipeList.item(0).setIcon(self.errorIcon)
		def update_progress_bar(pos,total):
			self.progress.setMaximum(total)
			self.progress.setValue(pos)
		thr.done.connect(finished_successful)
		thr.failed.connect(finished_error)
		thr.status.connect(update_progress_bar)
		thr.start()
		return thr


	class FilterRawThread(QtCore.QThread):
		done   = QtCore.pyqtSignal()
		failed = QtCore.pyqtSignal(str)
		status = QtCore.pyqtSignal(str)
		def __init__(self, parent):
			QtCore.QThread.__init__(self, parent)
		def run(self):
			def printer(text):
				self.status.emit(text)
			try:
				p = self.parent()
				p.pr('<b>Started filtering data.</b>')
				gps.filter_raw(p.target_table,p.read_table,
						max_matches=p.maxMatchesBox.value(),
						min_support = p.minSupportBox.value(),
						max_error = p.maxMappingError.value(),
						qual_percentile = p.mappingErrorQuantile.value(),
						pr=printer)
				gps.calculate_mapping_statistics(p.target_table)
			except Exception as e:
				self.failed.emit(e.message)
				raise
			else:
				self.done.emit()
	
	def step_raw_filter(self):
		thr = self.FilterRawThread(self)
		def finished_successful():
			self.pr('<b>Finished Filtering Raw Data.</b>')
			self.stepsDone = 1
			self.recipeList.item(1).setIcon(self.greenIcon)
		def finished_error(text):
			self.pr('<b><font color="DarkRed">Error in step 2!</font></b><br>Message: <i>'+text+'</i>')
			self.recipeList.item(1).setIcon(self.errorIcon)
		thr.done.connect(finished_successful)
		thr.failed.connect(finished_error)
		thr.status.connect(self.pr)
		
		if not self.stepsDone >= 0:
			self.pr('Error running step 2 (Raw Filtering): Need to load data first!')
			thr.failed.emit()
			return None
			
		thr.start()
		return thr
		
		
	class RefTableThread(QtCore.QThread):
		done   = QtCore.pyqtSignal(dict)
		failed = QtCore.pyqtSignal(str)
		status = QtCore.pyqtSignal(str)
		def __init__(self, parent):
			QtCore.QThread.__init__(self, parent)
		def run(self):
			def printer(text):
				self.status.emit(text)
			try:
				p = self.parent()
				p.pr('<b>Calculating Reference Table.</b>')
				if p.useMappingFile.isChecked():
					ref_table = gps.get_reference_table(p.target_table,p.read_table,str(p.mappingFileEdit.text()))
				elif p.useNCBIFiles.isChecked():
					if str(p.NCBICatalogPath.text()) == '' or str(p.NCBICatalogPath.text()) == '<default>':
						ref_table = gps.get_reference_table_NCBI(p.target_table,p.read_table, pkg_resources.resource_stream('microbegps','data/taxonomy/bact.catalog'), p.pr)
					elif str(p.NCBICatalogPath.text()).endswith('.gz'):
						import gzip
						ref_table = gps.get_reference_table_NCBI(p.target_table,p.read_table, gzip.open(str(p.NCBICatalogPath.text()),'r'), p.pr)
					else:
						ref_table = gps.get_reference_table_NCBI(p.target_table,p.read_table, open(str(p.NCBICatalogPath.text()),'r'), p.pr)
				else:
					ref_table = gps.get_reference_table(p.target_table,p.read_table)	
			except Exception as e:
				self.failed.emit(e.message)
				raise
			else:
				self.done.emit(ref_table)
	
	def step_reference_table(self):
		if not self.stepsDone >= 1:
			self.pr('Error running step 3 (Reference Table Calculation): Run the previous steps first!')
			self.recipeList.item(2).setIcon(self.errorIcon)
			return None
		thr = self.RefTableThread(self)
		def finished_successful(ref_table):
			self.pr('<b>Finished Reference Table calculation.</b>')
			self.ref_table = ref_table
			self.stepsDone = 2
			self.recipeList.item(2).setIcon(self.greenIcon)
		def finished_error(text):
			self.pr('<b><font color="DarkRed">Error in step 3!</font></b><br>Message: <i>'+text+'</i>')
			self.recipeList.item(2).setIcon(self.errorIcon)
		thr.done.connect(finished_successful)
		thr.failed.connect(finished_error)
		thr.status.connect(self.pr)
		thr.start()
		return thr


	class FilterQualityThread(QtCore.QThread):
		done   = QtCore.pyqtSignal()
		failed = QtCore.pyqtSignal(str)
		status = QtCore.pyqtSignal(str)
		def __init__(self, parent):
			QtCore.QThread.__init__(self, parent)
		def run(self):
			def printer(text):
				self.status.emit(text)
			try:
				p = self.parent()
				p.pr('<b>Started filtering data.</b>')
				def custom_filt1(ref):
					if not ref.unique >= p.minAbsoluteUnique.value():
						return False
					if not ref.cov_homog <= p.maxHomogeneity.value():
						return False
					return True
				gps.filter_ref_table(p.ref_table,p.read_table,custom_filt1,printer)
				#GpsTools.calculate_valcov(p.ref_table,printer)
			except Exception as e:
				self.failed.emit(e.message)
				raise
			else:
				self.done.emit()

	def step_quality_filter(self):
		thr = self.FilterQualityThread(self)
		def finished_successful():
			self.pr('<b>Finished Quality Filtering.</b>')
			self.stepsDone = 3
			self.recipeList.item(3).setIcon(self.greenIcon)
		def finished_error(text):
			self.pr('<b><font color="DarkRed">Error in step 4!</font></b><br>Message: <i>'+text+'</i>')
			self.recipeList.item(3).setIcon(self.errorIcon)
		thr.done.connect(finished_successful)
		thr.failed.connect(finished_error)
		thr.status.connect(self.pr)
		
		if not self.stepsDone >= 2:
			self.pr('Error running step 4 (Quality Filtering): Run the previous steps first!')
			thr.failed.emit('')
			return thr
		
		thr.start()
		return thr
		

	class CalcCandidatesThread(QtCore.QThread):
		done   = QtCore.pyqtSignal(list)
		failed = QtCore.pyqtSignal(str)
		status = QtCore.pyqtSignal(int,int)
		console = QtCore.pyqtSignal(str)
		def __init__(self, parent):
			QtCore.QThread.__init__(self, parent)
		def run(self):
			def printer(text):
				self.console.emit(text)
			try:
				p = self.parent()
				p.pr('<b>Started calculating candidates.</b>')

				from multiprocessing.pool import ThreadPool
				pool = ThreadPool(5)
				pool.map(gps.calculate_valcov_one,p.ref_table.itervalues())
				pool.close()
				
				
				#GpsTools.calculate_valcov(p.ref_table,printer)
				def custom_filt2(ref):
					if not ref.validity >= p.minValidity.value() or not ref.coverage > 0:
						return False
					return True	
				gps.filter_ref_table(p.ref_table,p.read_table,custom_filt2,printer)
				usr_table = gps.extract_USR(p.ref_table,p.read_table,p.coverageSimilarity.value(),printer)
				usr_mat,n2i,i2n = gps.get_read_matrix(usr_table,p.ref_table)
				all_mat,n2ia,i2na = gps.get_read_matrix(p.read_table,p.ref_table)
				resort = [n2ia[i2n[i]] for i in range(len(n2i))]
				all_mat = all_mat[:,resort]
				group_dict = gps.create_groups_dc(usr_mat,all_mat,n2i,p.sharedFractionUSR.value(),status=printer)
				candidates = gps.enrich_groups(group_dict,p.ref_table,p.read_table,i2n)
			except Exception as e:
				self.failed.emit(e.message)
				raise
			else:
				self.done.emit(candidates)
	
	def step_candidates(self):
		if not self.stepsDone >= 3:
			self.pr('Error running step 5 (Calculate Candidates): Run the previous steps first!')
			self.recipeList.item(4).setIcon(self.errorIcon)
			return None
		
		thr = self.CalcCandidatesThread(self)
		def finished_successful(candidates):
			self.pr('--- Found %i candidates.<br><b>Finished candidate list</b>.'%(len(candidates)))
			self.candidates = candidates
			self.stepsDone = 4
			self.recipeList.item(4).setIcon(self.greenIcon)
		def finished_error(text):
			self.pr('<b><font color="DarkRed">Error in step 5!</font></b><br>Message: <i>'+text+'</i>')
			self.recipeList.item(4).setIcon(self.errorIcon)
		def update_progress_bar(pos,total):
			self.progress.setMaximum(total)
			self.progress.setValue(pos)
		thr.done.connect(finished_successful)
		thr.failed.connect(finished_error)
		thr.status.connect(update_progress_bar)
		thr.console.connect(self.pr)
		thr.start()
		return thr

	class VisualizeResultsThread(QtCore.QThread):
		done   = QtCore.pyqtSignal()
		failed = QtCore.pyqtSignal(str)
		status = QtCore.pyqtSignal(str)
		def __init__(self, parent):
			QtCore.QThread.__init__(self, parent)
		def run(self):
			try:
				p = self.parent()
				p.mainWindow.visualize(p.candidates,p.ref_table,p.read_table)
				p.mainWindow.settings_pipeline = p.get_settings()
			except Exception as e:
				self.failed.emit(e.message)
				raise
			else:
				self.done.emit()

	def step_visualize(self):
		if not self.stepsDone >= 4:
			self.pr('Error running step 6 (Visualize Results): Run the previous steps first!')
			self.recipeList.item(5).setIcon(self.errorIcon)
			return None
		thr = self.VisualizeResultsThread(self)
		def finished_successful():
			self.pr('<b>Visualized Results in Analyzer.</b>')
			self.stepsDone = 5
			self.recipeList.item(5).setIcon(self.greenIcon)
			if self.closeOnFinish.isChecked():
				self.close()
		def finished_error(text):
			self.pr('<b><font color="DarkRed">Error in step 6!</font></b><br>Message: <i>'+text+'</i>')
			self.recipeList.item(5).setIcon(self.errorIcon)
		thr.done.connect(finished_successful)
		thr.failed.connect(finished_error)
		thr.status.connect(self.pr)
		thr.start()
		return thr
		

	class RunAllThread(QtCore.QThread):
		done   = QtCore.pyqtSignal(int)
		failed = QtCore.pyqtSignal(int,str)
		step = QtCore.pyqtSignal(int) # current step in the pipeline
		status = QtCore.pyqtSignal(int,int) # the status in the status bar
		def __init__(self, parent):
			QtCore.QThread.__init__(self, parent)
		def run(self):
			import time
			t_start = time.time()
			
			""" Run the whole pipeline at once """
			p = self.parent()
			
			########## LOAD DATA ##########
			p.pr('<b>Started loading data.</b><br>--- This may take a while.')
			self.step.emit(0)
			directory = str(p.fileNameEdit.text())
			if not directory.endswith('.sam'):
				if directory.endswith('/') or directory.endswith('\\'):
					directory += '*.sam'
				else:
					directory += '/*.sam'
			def status_reporter(n,total):
				self.status.emit(n,total)
			try:
				target_table,read_table = gps.read_sam_files_no_pysam(directory,self.status.emit)
			except Exception as e:
				self.failed.emit(0,e.message)
				raise
			else:
				self.done.emit(0)
			p.pr('---Found %i reads mapping to %i targets.<br><b>Finished Loading Data.</b>'%(len(read_table),len(target_table)))
			
			########## RAW FILTERING ##########
			p.pr('<b>Started filtering data.</b>')
			self.step.emit(1)
			try:
				gps.filter_raw(target_table,read_table,
						max_matches=p.maxMatchesBox.value(),
						min_support = p.minSupportBox.value(),
						max_error = p.maxMappingError.value(),
						qual_percentile = p.mappingErrorQuantile.value(),
						pr=p.pr)
				gps.calculate_mapping_statistics(target_table)
			except Exception as e:
				self.failed.emit(1,e.message)
				raise
			else:
				self.done.emit(1)

			########## CALCULATE REFERENCE TABLE ##########				
			p.pr('<b>Calculating Reference Table.</b>')
			self.step.emit(2)
			try:
				if p.useMappingFile.isChecked():
					ref_table = gps.get_reference_table(target_table,read_table,str(p.mappingFileEdit.text()))
				elif p.useNCBIFiles.isChecked():
					if str(p.NCBICatalogPath.text()) == '' or str(p.NCBICatalogPath.text()) == '<default>':
						ref_table = gps.get_reference_table_NCBI(target_table,read_table, pkg_resources.resource_stream('microbegps','data/taxonomy/bact.catalog'), p.pr)
					elif str(p.NCBICatalogPath.text()).endswith('.gz'):
						import gzip
						ref_table = gps.get_reference_table_NCBI(target_table,read_table, gzip.open(str(p.NCBICatalogPath.text()),'r'), p.pr)
					else:
						ref_table = gps.get_reference_table_NCBI(target_table,read_table, open(str(p.NCBICatalogPath.text()),'r'), p.pr)
				else:
					ref_table = gps.get_reference_table(target_table,read_table)	
			except Exception as e:
				self.failed.emit(2,e.message)
				raise
			else:
				self.done.emit(2)

			########## QUALITY FILTERING ##########
			p.pr('<b>Started filtering data.</b>')
			self.step.emit(3)
			try:
				def custom_filt1(ref):
					if not ref.unique >= p.minAbsoluteUnique.value():
						return False
					if not ref.cov_homog <= p.maxHomogeneity.value():
						return False
					return True
				gps.filter_ref_table(ref_table,read_table,custom_filt1,p.pr)
			except Exception as e:
				self.failed.emit(3,e.message)
				raise
			else:
				self.done.emit(3)
			
			########## CALCULATE CANDIDATES ##########
			p.pr('<b>Started calculating candidates.</b>')
			self.step.emit(4)
			try:
				from multiprocessing.pool import ThreadPool
				pool = ThreadPool(5)
				pool.map(gps.calculate_valcov_one,ref_table.itervalues())
				pool.close()
				#GpsTools.calculate_valcov(p.ref_table,printer)
				def custom_filt2(ref):
					if not ref.validity >= p.minValidity.value() or not ref.coverage > 0:
						return False
					return True	
				gps.filter_ref_table(ref_table,read_table,custom_filt2,p.pr)
				usr_table = gps.extract_USR(ref_table,read_table,p.coverageSimilarity.value(),p.pr)
				usr_mat,n2i,i2n = gps.get_read_matrix(usr_table,ref_table)
				all_mat,n2ia,i2na = gps.get_read_matrix(read_table,ref_table)
				resort = [n2ia[i2n[i]] for i in range(len(n2i))]
				all_mat = all_mat[:,resort]
				group_dict = gps.create_groups_dc(usr_mat,all_mat,n2i,p.sharedFractionUSR.value(),status=p.pr)
				candidates = gps.enrich_groups(group_dict,ref_table,read_table,i2n)
			except Exception as e:
				self.failed.emit(4,e.message)
				raise
			else:
				self.done.emit(4)
			p.pr('--- Found %i candidates.<br><b>Finished candidate list</b>.'%(len(candidates)))

			########## VISUALIZE RESULTS ##########
			self.step.emit(5)
			try:
				p.mainWindow.visualize(candidates,ref_table,read_table)
				p.mainWindow.settings_pipeline = p.get_settings()
			except Exception as e:
				self.failed.emit(5,e.message)
				raise
			else:
				self.done.emit(5)
				p.pr('<b>Finished.</b>')
			t = time.time() - t_start
			from math import floor
			t_min = floor(t/60)
			t_sec = int(t - t_min*60)
			p.pr('calculation took %i min %i sec'%(t_min,t_sec))

	def pr(self, text):
		""" prints text to the console of this window """
		self.consoleText.append(text)
		self.consoleText.verticalScrollBar().setValue(self.consoleText.verticalScrollBar().maximum())
		
		
	def runOneStep(self):
		""" Run the currently selected step of the pipeline. All previous steps must
		be completed before (i.e. have a green light). """
		step = self.recipeList.currentRow()
		for i in range(step+1,6):
			self.recipeList.item(i).setIcon(self.redIcon)
		self.recipeList.item(step).setIcon(self.yellowIcon)
		self.pipelineSteps[step]()


	def runPipeline(self):
		""" Run the whole pipeline from start to end. """
		thr = self.RunAllThread(self)
		def finished_step(step):
			self.recipeList.item(step).setIcon(self.greenIcon)
			self.stepsDone = step
		def started_step(step):
			self.recipeList.item(step).setIcon(self.yellowIcon)
		def finished_error(step,text):
			self.pr('<b><font color="DarkRed">Error in step '+str(step)+'!</font></b><br>Message: <i>'+text+'</i>')
			self.recipeList.item(step).setIcon(self.errorIcon)
		def update_progress_bar(pos,total):
			self.progress.setMaximum(total)
			self.progress.setValue(pos)
		for i in range(6):
			self.recipeList.item(i).setIcon(self.redIcon)			
		thr.done.connect(finished_step)
		thr.step.connect(started_step)
		thr.failed.connect(finished_error)
		thr.status.connect(update_progress_bar)
		thr.start()
		return thr
			
	def recipeStepSelected(self, current, previous):
		""" select the correct page in the settings box and show help text 
		after selecting a different step in the recipeList """
		self.settingsBox.setCurrentIndex(self.recipeList.currentRow())
		self.descriptionText.setHtml(SettingsHelpTexts[self.recipeList.currentRow()])
	
	def modifiedSettings(self, step):
		""" set the modified step all following steps to red"""
		for i in range(step,6):
			self.recipeList.item(i).setIcon(self.redIcon)
		self.stepsDone = min(step-1,self.stepsDone)

	def selectDirectory(self):
		fname = str(QtGui.QFileDialog().getExistingDirectory(caption='Select input directory',parent=self))
		if len(fname):
			self.pr("Input directory for SAM files: "+fname)
			self.fileNameEdit.setText(fname)
			self.modifiedSettings(0)

	def selectMappingFile(self):
		fname = str(QtGui.QFileDialog().getOpenFileName(caption='Load ID Mapping File',filter='Tab separated text files (*)',parent=self))
		if len(fname):
			self.pr("Using ID Mapping File "+fname)
			self.mappingFileEdit.setText(fname)
			self.modifiedSettings(2)
			self.useMappingFile.setChecked(True)

	def selectNCBICatalog(self):
		fname = str(QtGui.QFileDialog().getOpenFileName(directory='data/taxonomy/',caption='Select NCBI catalog file',parent=self))
		if len(fname):
			self.pr("Using NCBI catalog "+fname)
			self.NCBICatalogPath.setText(fname)
			self.modifiedSettings(2)
			self.useNCBIFiles.setChecked(True)

	def get_settings(self):
		""" Collect a dictionary containing all parameters used in the analysis """
		settings = dict()
		settings['input_directory'] = self.fileNameEdit.text()
		settings['min_support'] = self.minSupportBox.value()
		settings['max_read_matches'] = self.maxMatchesBox.value()
		settings['max_mapping_error'] = self.maxMappingError.value()
		settings['mapping_error_quantile'] = self.mappingErrorQuantile.value()
		settings['mapping_file'] = self.mappingFileEdit.text() if self.useMappingFile.isChecked() else None
		settings['min_unique_reads'] = self.minAbsoluteUnique.value()
		settings['max_homogeneity'] = self.maxHomogeneity.value()
		settings['min_validity'] = self.minValidity.value()
		settings['usr_coverage_similarity'] = self.coverageSimilarity.value()
		settings['fraction_shared_usr'] = self.sharedFractionUSR.value()
		settings['fraction_shared_all'] = self.sharedFractionAll.value()
		settings['version'] = __version__
		import time
		settings['date'] = time.localtime()
		
		return settings
		
	def set_settings(self, settings):
		""" Copy settings from a dictionary of parameters to the GUI """
		self.fileNameEdit.setText(settings.get('input_directory','.'))
		self.minSupportBox.setValue(settings.get('min_support',1))
		self.maxMatchesBox.setValue(settings.get('max_read_matches',100))
		self.maxMappingError.setValue(settings.get('max_mapping_error',1.0))
		self.mappingErrorQuantile.setValue(settings.get('mapping_error_quantile',1.0))
		if settings.get('mapping_file',None):
			self.mappingFileEdit.setText(settings.get('mapping_file'))
			self.useMappingFile.setChecked(True)
		elif settings.get('ncbi_file',None):
			self.NCBICatalogPath.setText(settings.get('ncbi_file'))
			self.useNCBIFiles.setChecked(True)
		else:
			self.useIdMapping.setChecked(True)
		self.minAbsoluteUnique.setValue(settings.get('min_unique_reads',0))
		self.maxHomogeneity.setValue(settings.get('max_homogeneity',1.0))
		self.minValidity.setValue(settings.get('min_validity',0.0))
		self.coverageSimilarity.setValue(settings.get('usr_coverage_similarity',0.2))
		self.sharedFractionUSR.setValue(settings.get('fraction_shared_usr',0.1))
		self.sharedFractionAll.setValue(settings.get('fraction_shared_all',0.8))


class GPSAnalyzer(QtGui.QMainWindow):
	def __init__(self):
		super(GPSAnalyzer, self).__init__()
		self.initUI()
		self.load_taxonomy()
		self.load_modules()

	def load_modules(self):
		""" Loads all python modules located in 'modules'. This function was 
		inspired by Luca Invernizzi on http://stackoverflow.com/a/8556471"""
		modules_d = dict()
		for importer, mod_name, _ in pkgutil.iter_modules(modules.__path__):
			#full_mod_name = '%s.%s' % ('microbegps.modules', mod_name)
			#if full_mod_name not in sys.modules:
			try:
				mod = importer.find_module(mod_name).load_module(mod_name)
				modules_d[mod_name] = mod.GPSModule(self)
			except Exception as e:
				self.pr('<b><font color="DarkRed">Failed loading module %s</font></b><br>Message: <i>%s</i>'%(mod_name,e.message))			
		self.modules = modules_d
		if len(self.modules) > 0:
			self.pr('Loaded modules: <i>%s</i>'%(', '.join(self.modules.iterkeys())))

	def initUI(self):
		self.pipelineGui = Pipeline(self)
		
		# Prepare the Docks on the right hand side
		self.graphicDock = QtGui.QDockWidget('Graphic',self)
		self.graphicDock.setFeatures(QtGui.QDockWidget.NoDockWidgetFeatures)
		self.graphicDock.setMinimumSize(450,300)
		self.graphicDock.setTitleBarWidget(QtGui.QWidget())
		
		self.toolsDock = QtGui.QDockWidget('Tools',self)
		self.toolsDock.setFeatures(QtGui.QDockWidget.NoDockWidgetFeatures)
		self.toolsDock.setMinimumHeight(150)
		self.toolsDock.setMaximumHeight(250)
		self.toolsDock.setTitleBarWidget(QtGui.QWidget())

		self.consoleDock = QtGui.QDockWidget('Console',self)
		self.consoleDock.setFeatures(QtGui.QDockWidget.NoDockWidgetFeatures)
		self.consoleDock.setMinimumHeight(150)
		self.consoleDock.setMaximumHeight(450)
		self.consoleDock.setTitleBarWidget(QtGui.QWidget())
		
		self.addDockWidget(QtCore.Qt.RightDockWidgetArea,self.graphicDock)
		self.addDockWidget(QtCore.Qt.RightDockWidgetArea,self.toolsDock)
		self.addDockWidget(QtCore.Qt.RightDockWidgetArea,self.consoleDock)
		
		# Fill the Graphics Dock

		self.figure = plt.figure()
		self.canvas = FigureCanvas(self.figure)
		
		self.phyloTreeWidget = QtGui.QTreeWidget()
		self.phyloTreeWidget.setHeaderHidden(True)
		self.phyloTreeWidget.setUniformRowHeights(False)
		self.phyloTreeWidget.setAutoScrollMargin(50)
		self.phyloTreeWidget.setSelectionMode(QtGui.QAbstractItemView.ExtendedSelection)
		self.phyloTreeWidget.setSelectionBehavior(QtGui.QAbstractItemView.SelectRows)
		self.phyloTreeWidget.setMouseTracking(True)
		self.phyloTreeWidget.itemClicked.connect(self.view_tree_selection)
		self.phyloTreeWidget.setColumnCount(2)
		self.phyloTreeWidget.itemCollapsed.connect(lambda x: self.phyloTreeWidget.resizeColumnToContents(0))
		self.phyloTreeWidget.itemExpanded.connect(lambda x: self.phyloTreeWidget.resizeColumnToContents(0))

		self.graphicsTab = QtGui.QTabWidget()
		self.graphicsTab.setTabPosition(QtGui.QTabWidget.South)
		self.graphicsTab.addTab(self.canvas,"Figure")
		self.graphicsTab.addTab(self.phyloTreeWidget,"Tree")
		
		self.graphicDock.setWidget(self.graphicsTab)	
		
		# Fill the Tools Dock
		self.enablePlottingBox = QtGui.QCheckBox('Draw Graphics')
		self.enablePlottingBox.setChecked(True)
		self.enableTaxonomyTree = QtGui.QCheckBox('Interactive Tree')
		self.enableTaxonomyTree.setChecked(True)
		
		exportFigureQualityLabel = QtGui.QLabel('Image export quality (DPI)')
		self.exportFigureQuality = QtGui.QSpinBox()
		self.exportFigureQuality.setMinimum(10)
		self.exportFigureQuality.setMaximum(1200)
		self.exportFigureQuality.setValue(150)
		self.exportFigureQuality.setSingleStep(10)
		
		showPipelineBtn = QtGui.QPushButton('Show GPS Pipeline')
		showPipelineBtn.clicked.connect(self.pipelineGui.show)

		toolsGrid = QtGui.QGridLayout()
		toolsGrid.addWidget(self.enablePlottingBox,0,0)
		toolsGrid.addWidget(self.enableTaxonomyTree,0,1)
		toolsGrid.addWidget(exportFigureQualityLabel,1,0)
		toolsGrid.addWidget(self.exportFigureQuality,1,1)
		toolsGrid.addWidget(showPipelineBtn,2,0)

		toolsWidget = QtGui.QWidget()
		toolsWidget.setLayout(toolsGrid)
		
		self.toolsTab = QtGui.QTabWidget()
		self.toolsTab.currentChanged.connect(self.show_module_help)
		self.toolsTab.setTabPosition(QtGui.QTabWidget.South)
		self.toolsTab.addTab(toolsWidget,"Settings")

		self.toolsDock.setWidget(self.toolsTab)
		
		# Fill the Console Dock
		self.helpText = QtGui.QTextEdit()
		self.helpText.setReadOnly(True)
		self.consoleText = QtGui.QTextBrowser()
		self.consoleText.setReadOnly(True)
		
		self.textTabs = QtGui.QTabWidget()
		self.textTabs.setTabPosition(QtGui.QTabWidget.South)
		self.textTabs.addTab(self.helpText,"Help")
		self.textTabs.addTab(self.consoleText,"Console")
		self.consoleDock.setWidget(self.textTabs)
		
		# Insert the main Tree Widget
		self.treeView = QtGui.QTreeWidget()
		self.treeView.setColumnCount(7)
		self.treeView.setColumnWidth(0,300)
		self.treeView.setHeaderLabels(['Name','Reads','Unique','Coverage','Validity','Homogeneity','Mapping Error'])
		self.treeView.itemClicked.connect(self.tree_item_clicked_handler)
		self.treeView.itemDoubleClicked.connect(self.expand_item)
		self.treeView.itemSelectionChanged.connect(self.selection_changed_handler)
		self.treeView.setUniformRowHeights(False)
		self.treeView.setAutoScrollMargin(50)
		self.treeView.setSelectionMode(QtGui.QAbstractItemView.ExtendedSelection)
		self.treeView.setSelectionBehavior(QtGui.QAbstractItemView.SelectRows)
		self.treeView.setMinimumWidth(400)
		self.treeView.setMouseTracking(True)
		self.treeView.header().setClickable(True)
		self.treeView.header().sectionClicked.connect(self.show_header_help)
		self.setCentralWidget(self.treeView)

		# New Analysis action
		new_analysis = QtGui.QAction(QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/run.png')), 'New Analysis', self)
		new_analysis.triggered.connect(self.new_analysis)
		
		# Load Data action
		load_data = QtGui.QAction(QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/open.png')), 'Load Data', self)
		load_data.triggered.connect(self.load_data)
		
		# Save Data action
		save_data = QtGui.QAction(QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/save.png')), 'Save Data', self)
		save_data.triggered.connect(self.save_data)
		
		# Exit Program action
		exit_program = QtGui.QAction(QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/exit.png')), 'Exit', self)
		exit_program.triggered.connect(self.close)
		
		# Export iTOL tree
		export_iTOL_tree = QtGui.QAction('Export Tree to iTOL', self)
		export_iTOL_tree.setToolTip('Export a set of files that can be loaded with the online tree visualizer iTOL.')
		export_iTOL_tree.triggered.connect(self.export_iTOL)

		# Export current graphic
		export_graphic = QtGui.QAction('Export Figure', self)
		export_graphic.setToolTip('Export the figure currently shown in the graphics panel to file.')
		export_graphic.triggered.connect(self.export_graphic)

		# Show the program information on the console
		show_program_info = QtGui.QAction('About MicrobeGPS', self)
		show_program_info.setToolTip('Show the program information on the console.')
		show_program_info.triggered.connect(self.show_program_info)

		# Load online help
		show_online_help = QtGui.QAction('Online help', self)
		show_online_help.setToolTip('Redirect to the online help of MicrobeGPS.')
		show_online_help.triggered.connect(self.show_online_help)
		
		# Show the license on the console
		show_license = QtGui.QAction('Show License', self)
		show_license.setToolTip('Show the licensing information on the console.')
		show_license.triggered.connect(self.show_license)
		
		self.enablePlottingBox.stateChanged.connect(lambda state: export_graphic.setEnabled(bool(state)))
		# Create the Menu
		menu = self.menuBar()
		file_menu = menu.addMenu('&File')
		file_menu.addAction(new_analysis)
		file_menu.addAction(load_data)
		file_menu.addAction(save_data)
		file_menu.addSeparator()
		file_menu.addAction(exit_program)
		
		data_menu = menu.addMenu('&Data')
		data_menu.addAction(export_graphic)
		data_menu.addAction(export_iTOL_tree)

		help_menu = menu.addMenu('&Help')
		help_menu.addAction(show_program_info)
		help_menu.addAction(show_online_help)
		help_menu.addAction(show_license)
		
		self.saving = False
		self.settings_pipeline = None # no pipeline settings given, use default instead
		self.settings_analyzer = None # no analyzer settings given, use default instead
		
		self.setMinimumSize(900,720)

		self.setWindowTitle('MicrobeGPS '+__version__)    
		self.show()
		

	def closeEvent(self, event):
		if self.saving:
			quit_msg = "Data saving in progress. Are you sure you want to exit the program? Unsaved data will be lost!"
			reply = QtGui.QMessageBox.question(self, 'Message', 
	                  quit_msg, QtGui.QMessageBox.Yes, QtGui.QMessageBox.No)
			if reply == QtGui.QMessageBox.Yes:
				event.accept()
				self.pipelineGui.close()
			else:
				event.ignore()			
		else:
			quit_msg = "Are you sure you want to exit the program?"
			reply = QtGui.QMessageBox.question(self, 'Message', 
	                  quit_msg, QtGui.QMessageBox.Yes, QtGui.QMessageBox.No)
			if reply == QtGui.QMessageBox.Yes:
				event.accept()
				self.pipelineGui.close()
			else:
				event.ignore()

	
	def pr(self, text, switch=False):
		""" Print text to the console box. Switch from the help box to the console box, if switch is True """
		self.consoleText.append(text)
		self.consoleText.verticalScrollBar().setValue(self.consoleText.verticalScrollBar().maximum())
		if switch:
			self.textTabs.setCurrentIndex(1)
	
	def visualize(self,sgroups,references,reads):
		candidateIcon = QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/candidate.png'))
		sequenceIcon = QtGui.QIcon(pkg_resources.resource_filename('microbegps','data/gui/sequence.png'))
		self.treeView.clear()
		# Sort groups by Total Number of Reads
		self.sgroups = sgroups
		self.reads = reads
		self.references = references
		# Create mapping: reference name --> group ID
		self.ref2group = dict()
		for i in range(len(self.sgroups)):
			for ref in self.sgroups[i].members:
				self.ref2group[ref] = i
		self.mapq = [1.,0.]
		# Fill the tree widget
		for g,grp in enumerate(self.sgroups):
			# Create entry for the group
			n_mem = len(grp.members)
			# find a suitable name. Try to use LCA of all supporting references.
			taxids = [ref.name for ref in grp.members.itervalues()]
			cand_name = taxonomy.find_lowest_common_ancestor_name(taxids, self.taxonomy_nodes, self.taxonomy_names)
			if not cand_name:
				#  find member with most unique reads -> use as representative
				most_unique = max([(m,grp.members[m].unique) for m in grp.members],key=lambda x:x[1])
				cand_name = most_unique[0]
			gr_item = QtGui.QTreeWidgetItem(self.treeView,['Cand. %i: %s (%i)'%(g+1,cand_name,n_mem),
					'%i'%grp.reads,'%i'%grp.unique, '%f'%grp.cov, '%f'%grp.max_val,
					'%f'%grp.cov_homog, '%f'%grp.map_qual],QtCore.Qt.AlignHCenter)
			gr_item.setIcon(0,candidateIcon)
			gr_item.setSizeHint(0,QtCore.QSize(0,int(self.fontMetrics().height()*1.5)))
			
			gr_item.i_type = "candidate"
			gr_item.reads = grp.reads
			gr_item.unique = grp.unique
			gr_item.coverage = grp.cov
			gr_item.validity = grp.max_val
			gr_item.cov_homog = grp.cov_homog
			gr_item.map_qual = grp.map_qual
			f=gr_item.font(0)
			f.setBold(True)
			gr_item.setFont(0,f)
			for i in range(1,gr_item.columnCount()):
				gr_item.setTextColor(i,QtCore.Qt.darkGray)
			# sort group members by number of reads
			sorted_members = sorted(grp.members, key=lambda x: -grp.members[x].validity )
			# Add group members as sub item
			for m in sorted_members:
				mbr = grp.members[m]
				self.mapq = [min(self.mapq[0],mbr.map_qual),max(self.mapq[1],mbr.map_qual)]
				m_item = QtGui.QTreeWidgetItem(gr_item,[m,'%i'%mbr.reads,
					'%i'%mbr.unique, '%f'%mbr.coverage, '%f'%mbr.validity,
					'%f'%mbr.cov_homog, '%f'%mbr.map_qual])
				m_item.setIcon(0,sequenceIcon)
				m_item.setSizeHint(0,QtCore.QSize(0,int(self.fontMetrics().height()*1.3)))
				m_item.i_type = "reference"
				m_item.reads = mbr.reads
				m_item.unique = mbr.unique
				m_item.coverage = mbr.coverage if not math.isnan(mbr.coverage) else 0.
				m_item.validity = mbr.validity if not math.isnan(mbr.validity) else 0.
				m_item.cov_homog = mbr.cov_homog
				m_item.map_qual = mbr.map_qual
				for i in range(1,m_item.columnCount()):
					m_item.setTextColor(i,QtCore.Qt.black)
				# Sub-Item: all reads
				ar_item = QtGui.QTreeWidgetItem(m_item,['All reads (%i)'%mbr.reads])
				ar_item.i_type = "all reads"
				# Sub-Item: unique reads
				ur_item = QtGui.QTreeWidgetItem(m_item,['Unique reads (%i)'%mbr.unique])
				ur_item.i_type = "unique reads"
				# Sub-Item: Top Shared Genomes
				rg_item = QtGui.QTreeWidgetItem(m_item,['Related genomes'])
				rg_item.i_type = "related"

		# Iterate over all items and colorize the fields
		root = self.treeView.invisibleRootItem()
		for g in range(root.childCount()):
			group = root.child(g)
			frac = int(255*(1.-group.unique/float(group.reads)))
			group.setBackground(2,QtGui.QBrush(QtGui.QColor(frac,255,frac)))
			for m in range(group.childCount()):
				m_item = group.child(m)
				frac = 255-int(m_item.unique/float(m_item.reads)*255)
				m_item.setBackground(2,QtGui.QBrush(QtGui.QColor(frac,255,frac)))
				frac = int(255*(1-m_item.validity))
				m_item.setBackground(4,QtGui.QBrush(QtGui.QColor(frac,255,frac)))
				frac = int(128.*(m_item.map_qual-self.mapq[0])/(self.mapq[1]-self.mapq[0]))
				m_item.setBackground(6,QtGui.QBrush(QtGui.QColor(127+frac,255-frac,0)))

				d_max = math.sqrt(math.log(2/0.05))/math.sqrt(2*m_item.reads)
				if m_item.cov_homog < d_max:
					m_item.setBackground(5,QtGui.QBrush(QtGui.QColor(0,0,255)))
					m_item.setTextColor(5,QtCore.Qt.white)
					
		# Build the taxonomy tree for all observed genomes
		self.taxid_to_item = dict()
		self.phyloTreeWidget.clear()
		for ref in self.references:
			taxid = self.references[ref].name
			if not taxid in self.taxonomy_nodes:
				continue
			lineage = taxonomy.get_lineage(taxid, self.taxonomy_nodes)
			# only show complete lineages
			if not lineage:
				continue
			self.taxid_to_item[1] = self.phyloTreeWidget
			par = None
			for tid in lineage[::-1]:
				if tid == 1:
					par = self.taxid_to_item[1]
					continue
				if not tid in self.taxid_to_item:
					# add item to tree if not already found
					self.taxid_to_item[tid] = QtGui.QTreeWidgetItem(par,[self.taxonomy_names.get(tid,'[Not found]')])
				par = self.taxid_to_item[tid]

				
	def expand_item(self,item,column):
		if item.childCount() > 0:
			return
		if hasattr(item,'i_type'):
			if item.i_type == "all reads":
				ref_name = str(item.parent().text(0))
				self.pr("Collecting all reads for "+ref_name,False)
				# collect all read names
				reads = dict()
				for trg in self.references[ref_name].targets.itervalues():
					for rd in trg.reads:
						reads[rd] = trg.reads[rd][2]
				for rd in reads:
					map_qual = reads[rd]
					num_matches = len(self.reads[rd].matches)
					rd_item = QtGui.QTreeWidgetItem(item,[rd,'%i'%num_matches,'','','','','%f'%map_qual])
					rd_item.i_type = "read"
					frac = int(128.*(map_qual-self.mapq[0])/(self.mapq[1]-self.mapq[0]))
					rd_item.setBackground(6,QtGui.QBrush(QtGui.QColor(max(0,min(127+frac,255)),min(max(255-frac,0),255),0)))
			if item.i_type == "unique reads":
				ref_name = str(item.parent().text(0))
				self.pr("Collecting all unique reads for "+ref_name,False)
				# collect all read names
				reads = dict()
				for trg in self.references[ref_name].targets.itervalues():
					for rd in trg.reads:
						if len(self.reads[str(rd)].matches) == 1:
							reads[rd] = trg.reads[rd][2]
				for rd in reads:
					map_qual = reads[rd]
					rd_item = QtGui.QTreeWidgetItem(item,[rd,'','','','','','%f'%map_qual])
					rd_item.i_type = "unique read"
					frac = int(128.*(map_qual-self.mapq[0])/(self.mapq[1]-self.mapq[0]))
					rd_item.setBackground(6,QtGui.QBrush(QtGui.QColor(max(0,min(127+frac,255)),min(max(255-frac,0),255),0)))
			if item.i_type == "read":
				rd_name = str(item.text(0))
				self.pr("Collecting all matches of read "+rd_name,False)
				for mtc in self.reads[rd_name].matches:
					map_qual = 1
					for trg in self.references[mtc].targets.itervalues():
						if rd_name in trg.reads:
							map_qual = trg.reads[rd_name][2]
							break
					mt_item = QtGui.QTreeWidgetItem(item,[mtc,'','','','','','%f'%map_qual])
					mt_item.i_type = "match"
					frac = int(128.*(map_qual-self.mapq[0])/(self.mapq[1]-self.mapq[0]))
					mt_item.setBackground(6,QtGui.QBrush(QtGui.QColor(max(0,min(127+frac,255)),min(max(255-frac,0),255),0)))
			if item.i_type == "match":
				match_name = str(item.text(0))
				group_id = self.ref2group[match_name]
				group = self.treeView.invisibleRootItem().child(group_id)
				for i in range(group.childCount()):
					if str(group.child(i).text(0)) == match_name:
						sel = group.child(i)
						break
				self.treeView.setCurrentItem(sel)
				self.treeView.expand(self.treeView.currentIndex())
				self.treeView.scrollToItem(self.treeView.currentItem())
				self.tree_item_clicked_handler(self.treeView.currentItem(),0)
			if item.i_type == "related":
				ref_name = str(item.parent().text(0))
				self.pr("Collecting all genomes sharing reads with "+ref_name,False)
				group_id = self.ref2group[ref_name]
				related = dict()
				for trg in self.references[ref_name].targets.itervalues():
					for rd in trg.reads:
						for mtc in self.reads[rd].matches:
							if self.ref2group[mtc] != group_id:
								related[mtc] = related.get(mtc,0)+1
				# Sort by size, descending
				sorted_names = sorted([(d,related[d]) for d in related],key=lambda x:-x[1])
				for name,shared in sorted_names:
					mt_item = QtGui.QTreeWidgetItem(item,[name,'%i'%shared])
					mt_item.i_type = "match"
					frac = int(255*(1-shared/float(self.references[ref_name].reads)))
					mt_item.setBackground(1,QtGui.QBrush(QtGui.QColor(255,frac,frac)))
					
	
	def tree_item_clicked_handler(self,item,column):
		self.show_tree_help(item,column)
				
		
	def plot_graph(self,item,column):
		if hasattr(item,'i_type'):
			if item.i_type == "candidate" and (column == 0 or column == 2):
				# show the distribution of unique reads in the group
				srt_list = []			
				for i in range(item.childCount()):
					ur = self.references[str(item.child(i).text(0))].unique
					nm = str(item.child(i).text(0))
					srt_list.append((ur,nm))
				srt_list = sorted(srt_list,key=lambda x:-x[0])
				data = []
				labels = []
				for i,d in enumerate(srt_list):
					data.append(d[0])
					if i < 7:
						labels.append(d[1])
					else:
						labels.append('')
				self.figure.clear()
				ax = self.figure.add_subplot(111,aspect=1)
				ax.set_title('Unique Reads of %s'%str(item.text(0)))
				ax.pie(data,labels=labels)
				plt.tight_layout()
				self.canvas.draw()
			if item.i_type == "candidate" and column == 1:
				pass
			if item.i_type == "reference":
				if not hasattr(item,'quals') or not hasattr(item,'uquals'):
					quals = []
					uquals = []
					name = str(item.text(0))
					for trg in self.references[name].targets.itervalues():
						for rd in trg.reads:
							if len(self.reads[rd].matches) == 1:
								uquals.append(trg.reads[rd][2])
								quals.append([trg.reads[rd][2],1])
							else:
								quals.append([trg.reads[rd][2],len(self.reads[rd].matches)])
					item.quals = quals
					item.uquals = uquals
				self.figure.clear()
				ax = self.figure.add_subplot(111)
				ax.set_title('Distribution of Mapping Error of %s'%str(item.text(0)))
				n,bins,patches = ax.hist([e for e,m in item.quals],40,color='b',label='All Reads')
				ax.hist(item.uquals,bins=bins,color='gold',label='Unique Reads')
				ax.legend(loc='upper right',ncol=2)
				ax.set_xlabel('Read Mapping Error')
				plt.tight_layout()
				self.canvas.draw()
			if item.i_type == "all reads":
				if not hasattr(item.parent(),'quals') or not hasattr(item.parent(),'uquals'):
					quals = []
					uquals = []
					name = str(item.parent().text(0))
					for trg in self.references[name].targets.itervalues():
						for rd in trg.reads:
							if len(self.reads[rd].matches) == 1:
								uquals.append(trg.reads[rd][2])
								quals.append([trg.reads[rd][2],1])
							else:
								quals.append([trg.reads[rd][2],len(self.reads[rd].matches)])
					item.parent().quals = quals
					item.parent().uquals = uquals
				self.figure.clear()
				ax = self.figure.add_subplot(111)
				ax.set_title('Read Matches vs Read Error %s'%str(item.parent().text(0)))
				er = [e for e,m in item.parent().quals]
				mt = [m for e,m in item.parent().quals]
				ax.plot(mt,er,'s',alpha=0.05,ms=8,mec='b',mfc='b')
				ax.set_xlabel('Number of read matches')
				ax.set_ylabel('Mapping error on this reference')
				ax.set_xlim(xmin=0.95)
				ax.set_ylim(ymin=0, ymax=self.mapq[1]*4)
				plt.tight_layout()
				self.canvas.draw()
	
	def show_tree_help(self,item,column):
		""" Show a help string depending on which item was clicked in the main widget """
		if hasattr(item,'i_type'):
			if item.i_type == "candidate":
				if column == 2:
					self.helpText.setText(HelpTexts["candidate_unique"])
				elif column == 3:
					self.helpText.setText(HelpTexts["candidate_coverage"])
				elif column == 4:
					self.helpText.setText(HelpTexts["candidate_validity"])
				elif column == 5:
					self.helpText.setText(HelpTexts["candidate_homogeneity"])
				elif column == 6:
					self.helpText.setText(HelpTexts["candidate_error"])
				else:
					self.helpText.setText(HelpTexts["candidate"])
			elif item.i_type == "reference":
				if column == 1:
					self.helpText.setText(HelpTexts["reference_reads"])				
				elif column == 2:
					self.helpText.setText(HelpTexts["reference_unique"])
				elif column == 3:
					self.helpText.setText(HelpTexts["reference_coverage"])
				elif column == 4:
					self.helpText.setText(HelpTexts["reference_validity"])
				elif column == 5:
					self.helpText.setText(HelpTexts["reference_homogeneity"])
				elif column == 6:
					self.helpText.setText(HelpTexts["reference_error"])
				else:
					self.helpText.setText(HelpTexts["reference"])
			elif item.i_type == "all reads":
				self.helpText.setText(HelpTexts["all reads"])
			elif item.i_type == "unique reads":
				self.helpText.setText(HelpTexts["unique reads"])
			elif item.i_type == "related":
				self.helpText.setText(HelpTexts["related"])
			elif item.i_type == "read":
				self.helpText.setText(HelpTexts["read"])
			elif item.i_type == "unique read":
				self.helpText.setText(HelpTexts["unique read"])
			elif item.i_type == "match":
				self.helpText.setText(HelpTexts["match"])
	
	def show_header_help(self,column):
		""" Show help text when the header of a column is clicked """
		if column == 0:
			self.helpText.setText(HelpTexts["name"])	
		elif column == 1:
			self.helpText.setText(HelpTexts["candidate_reads"])				
		elif column == 2:
			self.helpText.setText(HelpTexts["candidate_unique"])
		elif column == 3:
			self.helpText.setText(HelpTexts["candidate_coverage"])
		elif column == 4:
			self.helpText.setText(HelpTexts["candidate_validity"])
		elif column == 5:
			self.helpText.setText(HelpTexts["candidate_homogeneity"])
		elif column == 6:
			self.helpText.setText(HelpTexts["candidate_error"])
			
	def show_module_help(self, selectedTab):
		""" Show a modules help text, if available """
		moduleWidget = self.toolsTab.widget(selectedTab)
		if hasattr(moduleWidget,'helpText'):
			hText = moduleWidget.helpText
			self.helpText.setText(hText)

	
	def selection_changed_handler(self):
		if self.enableTaxonomyTree.isChecked():
			sel_items = self.treeView.selectedItems()
			
			# first collect all read names for all taxids
			reads = dict()
			self.phyloTreeWidget.collapseAll()
	
			for item in sel_items:
				# collect all selected taxids
				if item.i_type == "reference":
					name = str(item.text(0))
					taxid = self.references[name].name
					reads[taxid] = set()
					for trg in self.references[name].targets.itervalues():
						reads[taxid].update(trg.reads) # add all read names to the set
				elif item.i_type == "candidate":
					for ref in range(item.childCount()):
						ref_item = item.child(ref)
						name = str(ref_item.text(0))
						taxid = self.references[name].name
						if not taxid in reads:
							reads[taxid] = set()
							for trg in self.references[name].targets.itervalues():
								reads[taxid].update(trg.reads) # add all read names to the set
			# build the lineage of every taxid
			taxids = reads.keys()
			for taxid in taxids:
				lineage = [taxid]
				while True:
					current = lineage[-1]
					parent = self.taxonomy_nodes.get(current,None)
					lineage.append(parent)
					if not parent or parent == 1:
						break
				# only show complete lineages
				if lineage[-1] != 1:
					continue
				for tid in lineage[1:]:
					reads[tid] = reads[taxid].union(reads.get(tid,set()))

		if self.enablePlottingBox.isChecked():
			# now color the tree
			for taxid,item in self.taxid_to_item.iteritems():
				if not taxid == 1:
					item.setBackground(0,QtGui.QBrush(QtGui.QColor(255,255,255)))
					item.setText(1,'')
			if 1 in reads:
				total_reads = float(len(reads[1]))
				for taxid in reads:
					if not taxid == 1:
						N_reads = len(reads.get(taxid,set()))
						frac =100+ int((1.-N_reads/total_reads)*155)
						if taxid in self.taxid_to_item:
							self.taxid_to_item[taxid].setBackground(0,QtGui.QBrush(QtGui.QColor(frac,frac,255)))
							self.phyloTreeWidget.expandItem(self.taxid_to_item[taxid])
							self.taxid_to_item[taxid].setText(1,'%i'%N_reads)
			self.phyloTreeWidget.resizeColumnToContents(0)
	
			# second create the venn plots		
			if len(sel_items) == 1:
				self.plot_graph(sel_items[0],0)
			elif len(sel_items) == 2:
				# check if both are references
				if sel_items[0].i_type == sel_items[1].i_type == "reference":
					nm1 = str(sel_items[0].text(0))
					nm2 = str(sel_items[1].text(0))
					reads1 = set()
					for t in self.references[nm1].targets.itervalues():
						reads1.update(t.reads.keys())
					reads2 = set()
					for t in self.references[nm2].targets.itervalues():
						reads2.update(t.reads.keys())
					nreads1 = len(reads1)
					nreads2 = len(reads2)
					shared = len(reads1.intersection(reads2))
	
					self.figure.clear()
					ax = self.figure.add_subplot(111)
					venn.venn2([nreads1-shared,nreads2-shared,shared],set_labels=[nm1,nm2], alpha=0.5, normalize_to=1.0, ax=ax)
					ax.set_title('Shared reads')
					self.canvas.draw()
			elif len(sel_items) == 3:
				# check if all items are references
				if sel_items[0].i_type == sel_items[1].i_type == sel_items[2].i_type == "reference":
					nm1 = str(sel_items[0].text(0))
					nm2 = str(sel_items[1].text(0))
					nm3 = str(sel_items[2].text(0))
					reads1 = set()
					for t in self.references[nm1].targets.itervalues():
						reads1.update(t.reads.keys())
					reads2 = set()
					for t in self.references[nm2].targets.itervalues():
						reads2.update(t.reads.keys())
					reads3 = set()
					for t in self.references[nm3].targets.itervalues():
						reads3.update(t.reads.keys())
					reg100 = len(reads1.difference(reads2).difference(reads3))
					reg010 = len(reads2.difference(reads1).difference(reads3))
					reg110 = len(reads1.intersection(reads2).difference(reads3))
					reg001 = len(reads3.difference(reads1).difference(reads2))
					reg101 = len(reads1.intersection(reads3).difference(reads2))
					reg011 = len(reads2.intersection(reads3).difference(reads1))
					reg111 = len(reads1.intersection(reads2).intersection(reads3))
					self.figure.clear()
					ax = self.figure.add_subplot(111)
					venn.venn3([reg100,reg010,reg110,reg001,reg101,reg011,reg111],set_labels=[nm1,nm2,nm3], alpha=0.5, normalize_to=1.0, ax=ax)
					ax.set_title('Shared Reads')
					self.canvas.draw()

	def load_taxonomy(self, fname=None):
		""" Load NCBI taxonomy files.  """

		class NodesLoaderThread(QtCore.QThread):
			done   = QtCore.pyqtSignal(dict,dict)
			failed = QtCore.pyqtSignal(str)
			def __init__(self, parent):
				QtCore.QThread.__init__(self, parent)
			def run(self):
				try:
					#nodes,ranks = taxonomy.parse_nodes_dmp(gzip.open('data/taxonomy/nodes.dmp.gz','r'))
					nodes,ranks = taxonomy.parse_nodes_dmp(pkg_resources.resource_stream('microbegps','data/taxonomy/nodes.dmp'))
				except Exception as e:
					self.failed.emit('<b><font color="DarkRed">Error loading NCBI taxonomy!</font></b><br>Message: <i>'+e.message+'</i>')
					raise
				else:
					self.done.emit(nodes,ranks)
		class NamesLoaderThread(QtCore.QThread):
			done   = QtCore.pyqtSignal(dict)
			failed = QtCore.pyqtSignal(str)
			def __init__(self, parent):
				QtCore.QThread.__init__(self, parent)
			def run(self):
				try: 
					#target = taxonomy.parse_names_dmp(gzip.open('data/taxonomy/names.dmp.gz','r'))
					target = taxonomy.parse_names_dmp(pkg_resources.resource_stream('microbegps','data/taxonomy/names.dmp'))
				except Exception as e:
					self.failed.emit('<b><font color="DarkRed">Error loading NCBI names!</font></b><br>Message: <i>'+e.message+'</i>')
					raise
				else:
					self.done.emit(target)

		def failed_status(msg):
			self.pr(msg)
		def set_nodes(nodes,ranks):
			self.taxonomy_nodes = nodes
			self.taxonomy_ranks = ranks
			self.pr('Loaded NCBI Taxonomy')
		def set_names(names):
			self.taxonomy_names = names
			self.pr('Loaded NCBI names')
			
		nolt = NodesLoaderThread(self)
		nolt.failed.connect(failed_status)
		nolt.done.connect(set_nodes)
		nalt = NamesLoaderThread(self)
		nalt.failed.connect(failed_status)
		nalt.done.connect(set_names)
		
		nolt.start()
		nalt.start()

	def new_analysis(self):
		self.pipelineGui.show()
				
	def save_data(self):
		fname = str(QtGui.QFileDialog().getSaveFileName(caption='Save Data',filter="GPS Analyzer Files (*.gpsa);;All Files (*.*)",parent=self))
		if len(fname):
			if not fname.endswith('.gpsa'):
				if not '.' in fname:
					fname += '.gpsa'
			self.pr("Saving data to file "+fname)
			self.pr("Attention: this may take a while!")
			def savethisfile():
				self.saving = True
				try:
					# create a dictionary to store everything
					s_data = {'candidates':self.sgroups,
							'references':self.references,
							'reads':self.reads,
							'settings_pipeline':self.settings_pipeline,
							'settings_analyzer':self.settings_analyzer}
					cPickle.dump(s_data,open(fname,'wb'),protocol=-1)
				except Exception as e:
					self.pr('Error while saving file: '+e.message)
					raise
				else:
					self.pr("Finished saving data.")
				self.saving = False
			save_thread = threading.Thread(target=savethisfile)
			save_thread.start()

				
	def load_data(self):
		dialog = QtGui.QFileDialog(self,'Load Data')
		fname = str(dialog.getOpenFileName(caption='Load Data',filter="GPS Analyzer Files (*.gpsa);;All Files (*.*)",parent=self))
		if len(fname):
			self.pr("Loading data from file "+fname)
			self.pr("Attention: this may take a while!")
			def loadthisfile():
				try:
					settings = cPickle.load(open(fname,'rb'))
					assert type(settings) == dict
					try:
						sgroups = settings['candidates']
						references = settings['references']
						reads = settings['reads']
					except:
						raise Exception("File corrupted.")
					settings_pipeline = settings.get('settings_pipeline',dict())
					settings_analyzer = settings.get('settings_analyzer',dict())						
				except Exception as e:
					self.pr('Error while loading file: '+e.message)
					raise
				else:
					self.treeView.clear()
					self.visualize(sgroups,references,reads)
					self.settings_pipeline = settings_pipeline
					self.settings_analyzer = settings_analyzer
					self.pipelineGui.set_settings(self.settings_pipeline)
					self.pr("Finished loading data.")
			load_thread = threading.Thread(target=loadthisfile)
			load_thread.start()
			

	def export_iTOL(self):
		""" Export the whole tree to files that can be loaded with iTOL.
		http://itol.embl.de/upload.cgi"""
		try:
			outdir = str(QtGui.QFileDialog().getExistingDirectory(caption='Select iTOL output directory',parent=self))
			taxonomy.candidates_to_LCA_tree(self.sgroups,self.taxonomy_nodes,self.taxonomy_names,outdir)
		except Exception as e:
			self.pr('Error while saving iTOL files: '+e.message)
			raise
			
	
	def export_graphic(self):
		""" export the current content of the figure to file. Use the DPI 
		quality specified in exportFigureQuality """
		dialog = QtGui.QFileDialog(self,'Save Graphic as ...')
		fname = str(dialog.getSaveFileName(caption='Save Graphic as ...',filter="Raster Images (*.png *.tiff *.jpg);;Vector Images (*.pdf *.eps)",parent=self))
		if len(fname) > 0:
			self.figure.savefig(fname,dpi=self.exportFigureQuality.value())
			self.pr('Saved current figure to <i>%s</i>'%fname)

	def show_program_info(self):
		""" print a program info string to the console """
		self.pr("""<br><b><i>About MicrobeGPS</i></b><br>
		<u>Version:</u>   {version}<br>
		<u>Author:</u>   Martin Lindner, lindnerm@rki.de<br>
		<u>URL:</u>   <tt>{url}</tt></a><br>
		<u>License:</u>   BSD<br><br>
		MicrobeGPS is a bioinformatics tool for the analysis of metagenomic sequencing
		data. The goal is to profile the composition of metagenomic communities as 
		accurately as possible and present the results to the user in a convenient
		manner. One main focus is reliability: the tool calculates quality metrics for
		the estimated candidates and allows the user to identify false candidates
		easily.""".format(version=__version__,url='https://sourceforge.net/p/microbegps'),True)
		

	def show_license(self):
		""" print the license information to the console """
		self.pr("""<br>Copyright (c) 2014, Martin S. Lindner, LindnerM@rki.de, Germany,
All rights reserved.<br><br>MicrobeGPS is licensed under the BSD license. Please read
the LICENSE file shipped with this software.<br>""", True)

	def 	show_online_help(self):
		""" open the MicrobeGPS project homepage in the standard web browser """
		import webbrowser
		webbrowser.open(__url__,new=2)


	def view_tree_selection(self, item):
		# View the item selected in the phylogenetic tree
		ref_name = str(item.text(0))
		if not ref_name in self.ref2group:
			return
		group_id = self.ref2group[ref_name]
		group = self.treeView.invisibleRootItem().child(group_id)
		for i in range(group.childCount()):
			if str(group.child(i).text(0)) == ref_name:
				sel = group.child(i)
				break
		self.treeView.expandItem(sel)
		self.treeView.scrollToItem(sel)
		
def main():
	app = QtGui.QApplication(sys.argv)
	GPSAnalyzer() 
	app.exec_()	

if __name__ == '__main__':
	main()