File: pll.h

package info (click to toggle)
iqtree 1.5.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 9,780 kB
  • ctags: 11,529
  • sloc: cpp: 96,162; ansic: 59,874; python: 242; sh: 189; makefile: 45
file content (1692 lines) | stat: -rw-r--r-- 60,057 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
/** 
 * PLL (version 1.0.0) a software library for phylogenetic inference
 * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
 *
 * Derived from 
 * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
 * trees by Alexandros Stamatakis
 *
 * This program is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * For any other enquiries send an Email to Tomas Flouri
 * Tomas.Flouri@h-its.org
 *
 * When publishing work that uses PLL please cite PLL
 *
 * ABSTRACT
 * 
 * PLL is a highly optimized, parallelized software library to ease the
 * development of new software tools dealing with phylogenetic inference. Among
 * the functions included in PLL are 
 *
 * DOCUMENTATION
 *
 * Extensive documentation for using PLL is available online at
 * 
 *                 http://www.libpll.org
 *
 *
 * USAGE
 *
 * To use PLL, 
 *
 * @file pll.h
 * @brief Data structures for tree and model 
 *
 * @author Tomas Flouri
 * @author Fernando Izquierdo-Carrasco
 * @author Andre Aberer
 * @author Alexandros Stamatakis
 */

#ifndef __pll__
#define __pll__

#include <stdint.h>
#include <stdio.h>
#include <errno.h>

#ifdef __cplusplus
extern "C" {
#endif

#ifdef __MIC_NATIVE
#define PLL_BYTE_ALIGNMENT 64
#define PLL_VECTOR_WIDTH 8
#elif defined (__AVX)

#include <xmmintrin.h>
#include <immintrin.h>
#include <pmmintrin.h>

#define PLL_BYTE_ALIGNMENT 32
#define PLL_VECTOR_WIDTH 4

#elif defined (__SSE3)

#include <xmmintrin.h>
#include <pmmintrin.h>

#define PLL_BYTE_ALIGNMENT 16
#define PLL_VECTOR_WIDTH 2

#else
#define PLL_BYTE_ALIGNMENT 8
#define PLL_VECTOR_WIDTH 1
#endif

#ifdef _MSC_VER
	#define PLL_ALIGN_BEGIN __declspec(align(PLL_BYTE_ALIGNMENT))
	#define PLL_ALIGN_END
#else
	#define PLL_ALIGN_BEGIN
	#define PLL_ALIGN_END __attribute__((aligned(PLL_BYTE_ALIGNMENT)))
#endif


#include "stack.h"
#include "newick.h"
#include "queue.h"

#define PLL_MAX_TIP_EV                          0.999999999 /* max tip vector value, sum of EVs needs to be smaller than 1.0, otherwise the numerics break down */
#define PLL_MAX_LOCAL_SMOOTHING_ITERATIONS      32          /** @brief maximum iterations of smoothings per insert in the */
#define PLL_ITERATIONS                          10          /* maximum iterations of iterations per insert */
#define PLL_NEWZPERCYCLE                        10           /* iterations of makenewz per tree traversal */
#define PLL_NMLNGTH                             256         /* number of characters in species name */
#define PLL_DELTAZ                              0.00001     /* test of net branch length change in update */
#define PLL_DEFAULTZ                            0.9         /* value of z assigned as starting point */
#define PLL_UNLIKELY                            -1.0E300    /* low likelihood for initialization */
#define PLL_SUMMARIZE_LENGTH                    -3
#define PLL_SUMMARIZE_LH                        -2
#define PLL_NO_BRANCHES                         -1
#define PLL_MASK_LENGTH                         32
#define PLL_ZMIN                                1.0E-15  /* max branch prop. to -log(PLL_ZMIN) (= 34) */
#define PLL_ZMAX                                (1.0 - 1.0E-6) /* min branch prop. to 1.0-zmax (= 1.0E-6) */
#define PLL_TWOTOTHE256                         115792089237316195423570985008687907853269984665640564039457584007913129639936.0  /*  2**256 (exactly)  */
#define PLL_MINLIKELIHOOD                       (1.0/PLL_TWOTOTHE256)
#define PLL_MINUSMINLIKELIHOOD                  -PLL_MINLIKELIHOOD


#define PLL_FORMAT_PHYLIP                       1 
#define PLL_FORMAT_FASTA                        2
#define PLL_FORMAT_NEWICK                       3

#define PLL_NNI_P_NEXT                          1       /**< Use p->next for the NNI move */
#define PLL_NNI_P_NEXTNEXT                      2       /**< Use p->next->next for the NNI move */

#define PLL_BADREAR                             -1

#define PLL_NUM_BRANCHES                        16

#define PLL_TRUE                                1
#define PLL_FALSE                               0

#define PLL_REARRANGE_SPR                       0
#define PLL_REARRANGE_TBR                       1
#define PLL_REARRANGE_NNI                       2

#define PLL_AA_SCALE                            10.0
#define PLL_AA_SCALE_PLUS_EPSILON               10.001

/* ALPHA_MIN is critical -> numerical instability, eg for 4 discrete rate cats                    */
/* and alpha = 0.01 the lowest rate r_0 is                                                        */
/* 0.00000000000000000000000000000000000000000000000000000000000034878079110511010487             */
/* which leads to numerical problems Table for alpha settings below:                              */
/*                                                                                                */
/* 0.010000 0.00000000000000000000000000000000000000000000000000000000000034878079110511010487    */
/* 0.010000 yielded nasty numerical bugs in at least one case !                                   */
/* 0.020000 0.00000000000000000000000000000044136090435925743185910935350715027016962154188875    */
/* 0.030000 0.00000000000000000000476844846859006690412039180149775802624789852441798419292220    */
/* 0.040000 0.00000000000000049522423236954066431210260930029681736928018820007024736185030633    */
/* 0.050000 0.00000000000050625351310359203371872643495343928538368616365517027588794007897377    */
/* 0.060000 0.00000000005134625283884191118711474021861409372524676086868566926568746566772461    */
/* 0.070000 0.00000000139080650074206434685544624965062437960128249869740102440118789672851562    */
/* 0.080000 0.00000001650681201563587066858709818343436959153791576682124286890029907226562500    */
/* 0.090000 0.00000011301977332931251259273962858978301859735893231118097901344299316406250000    */
/* 0.100000 0.00000052651925834844387815526344648331402709118265192955732345581054687500000000    */

#define PLL_ALPHA_MIN                           0.02
#define PLL_ALPHA_MAX                           1000.0

#define PLL_RATE_MIN                            0.0000001
#define PLL_RATE_MAX                            1000000.0

#define PLL_LG4X_RATE_MIN                       0.0000001
#define PLL_LG4X_RATE_MAX                       1000.0

#define PLL_FREQ_MIN                            0.001

#define PLL_NUM_AA_STATES                       20
#define PLL_NUM_DNA_STATES                      4

/* 
   previous values between 0.001 and 0.000001

   TO AVOID NUMERICAL PROBLEMS WHEN FREQ == 0 IN PARTITIONED MODELS, ESPECIALLY WITH AA 
   previous value of FREQ_MIN was: 0.000001, but this seemed to cause problems with some 
   of the 7-state secondary structure models with some rather exotic small toy test datasets,
   on the other hand 0.001 caused problems with some of the 16-state secondary structure models

   For some reason the frequency settings seem to be repeatedly causing numerical problems
*/

#define PLL_ITMAX                               100    /* max number of iterations in brent's algorithm */

#define PLL_SHFT(a,b,c,d)                       (a)=(b);(b)=(c);(c)=(d);
#define PLL_SIGN(a,b)                           ((b) > 0.0 ? fabs(a) : -fabs(a))
#define PLL_ABS(x)                              (((x)<0)   ?  (-(x)) : (x))
#define PLL_MIN(x,y)                            (((x)<(y)) ?    (x)  : (y))
#define PLL_MAX(x,y)                            (((x)>(y)) ?    (x)  : (y))
#define PLL_SWAP(x,y)                           do{ __typeof__ (x) _t = x; x = y; y = _t; } while(0)
#define PLL_SWAP_PTR(x,y) do{ char* _t = x; x = y; y = _t; } while(0)
#define PLL_SWAP_INT(x,y) do{ int _t = x; x = y; y = _t; } while(0)

#define PLL_POINT_GAMMA(prob,alpha,beta)        PointChi2(prob,2.0*(alpha))/(2.0*(beta))

#define PLL_LIB_NAME                            "PLL"
#define PLL_LIB_VERSION                         "1.0.1"
#define PLL_LIB_DATE                            "November 3 2014"

/* aminoacid substitution models */
#define PLL_DAYHOFF                             0
#define PLL_DCMUT                               1
#define PLL_JTT                                 2
#define PLL_MTREV                               3
#define PLL_WAG                                 4
#define PLL_RTREV                               5
#define PLL_CPREV                               6
#define PLL_VT                                  7
#define PLL_BLOSUM62                            8
#define PLL_MTMAM                               9
#define PLL_LG                                  10
#define PLL_MTART                               11
#define PLL_MTZOA                               12
#define PLL_PMB                                 13
#define PLL_HIVB                                14
#define PLL_HIVW                                15
#define PLL_JTTDCMUT                            16
#define PLL_FLU                                 17 
#define PLL_AUTO                                18
#define PLL_LG4M                                 19
#define PLL_LG4X                                20
#define PLL_GTR                                 21  /* GTR always needs to be the last one */
#define PLL_NUM_PROT_MODELS                     22

/* information criteria for auto protein model selection */
#define PLL_AUTO_ML   0
#define PLL_AUTO_BIC  1
#define PLL_AUTO_AIC  2
#define PLL_AUTO_AICC 3

/* bipartition stuff */
#define PLL_BIPARTITIONS_RF                     4

/* scenarios for likelihood computation */
#define PLL_TIP_TIP                             0
#define PLL_TIP_INNER                           1
#define PLL_INNER_INNER                         2


/* available data types in PLL */
#define PLL_MIN_MODEL                          -1
#define PLL_BINARY_DATA                         0
#define PLL_DNA_DATA                            1
#define PLL_AA_DATA                             2
#define PLL_SECONDARY_DATA                      3
#define PLL_SECONDARY_DATA_6                    4
#define PLL_SECONDARY_DATA_7                    5
#define PLL_GENERIC_32                          6
#define PLL_GENERIC_64                          7
#define PLL_MAX_MODEL                           8

#define PLL_SEC_6_A                             0
#define PLL_SEC_6_B                             1
#define PLL_SEC_6_C                             2
#define PLL_SEC_6_D                             3
#define PLL_SEC_6_E                             4

#define PLL_SEC_7_A                             5
#define PLL_SEC_7_B                             6
#define PLL_SEC_7_C                             7
#define PLL_SEC_7_D                             8
#define PLL_SEC_7_E                             9
#define PLL_SEC_7_F                             10

#define PLL_SEC_16                              11
#define PLL_SEC_16_A                            12
#define PLL_SEC_16_B                            13
#define PLL_SEC_16_C                            14
#define PLL_SEC_16_D                            15
#define PLL_SEC_16_E                            16
#define PLL_SEC_16_F                            17
#define PLL_SEC_16_I                            18
#define PLL_SEC_16_J                            19
#define PLL_SEC_16_K                            20

#define PLL_ORDERED_MULTI_STATE                 0
#define PLL_MK_MULTI_STATE                      1
#define PLL_GTR_MULTI_STATE                     2


/* available models of rate heterogeneity in PLL */
#define PLL_CAT                                 0
#define PLL_GAMMA                               1

/* recomp */
#define PLL_SLOT_UNUSED                        -2  /* value to mark an available vector */
#define PLL_NODE_UNPINNED                      -3  /* marks an inner node as not available in RAM */
#define PLL_INNER_NODE_INIT_STLEN              -1  /* initialization */

#define PLL_MIN_RECOM_FRACTION     0.1 /* at least this % of inner nodes will be allocated in RAM */
#define PLL_MAX_RECOM_FRACTION     1.0 /* always 1, just there for boundary checks */


typedef  int pllBoolean;

/* @brief PLL instance attribute structure */
typedef struct
{
  int rateHetModel;
  int fastScaling;
  int saveMemory;
  int useRecom;
  long randomNumberSeed;
  int numberOfThreads;
} pllInstanceAttr;

/** @brief Stores the recomputation-state of likelihood vectors  */
typedef struct
{
  int numVectors;      /**< Number of inner vectors allocated in RAM*/
  int *iVector;        /**< size: numVectors, stores node id || PLL_SLOT_UNUSED  */
  int *iNode;          /**< size: inner nodes, stores slot id || PLL_NODE_UNPINNED */
  int *stlen;          /**< Number of tips behind the current orientation of the indexed inner node (subtree size/cost) */ 
  int *unpinnable;     /**< size:numVectors , TRUE if we dont need the vector */
  int maxVectorsUsed;  
  pllBoolean allSlotsBusy; /**< on if all slots contain an ancesctral node (the usual case after first full traversal) */ 
} recompVectors;
/* E recomp */

/** @brief ???
 * @todo add explanation, is this ever used?  */
 
typedef unsigned int hashNumberType;



/*typedef uint_fast32_t parsimonyNumber;*/

#define PLL_PCF 32

/** @brief ???Hash tables 
 * @todo add explanation of all hash tables  */
typedef struct pllBipartitionEntry
{
  unsigned int *bitVector;
  unsigned int *treeVector;
  unsigned int amountTips;
  int *supportVector;
  unsigned int bipNumber;
  unsigned int bipNumber2;
  unsigned int supportFromTreeset[2]; 
  struct pllBipartitionEntry *next;
} pllBipartitionEntry;

//typedef struct
//{
//  hashNumberType tableSize;
//  entry **table;
//  hashNumberType entryCount;
//}
//  hashtable;
//struct stringEnt
//{
//  int nodeNumber;
//  char *word;
//  struct stringEnt *next;
//};
//
//typedef struct stringEnt stringEntry;
//typedef struct
//{
//  hashNumberType tableSize;
//  stringEntry **table;
//}
//  stringHashtable;

typedef struct pllHashItem
{
  void * data;
  char * str;
  struct pllHashItem * next;
} pllHashItem;

typedef struct pllHashTable
{
  unsigned int size;
  struct pllHashItem ** Items;
  unsigned int entries;
} pllHashTable;




/** @brief Per-site Rate category entry: likelihood per-site and CAT rate applied ???
  *
  */
typedef struct ratec
{
  double accumulatedSiteLikelihood;
  double rate;
}rateCategorize;

/** @brief Traversal descriptor entry.
  * 
  * Contains the information required to execute an operation in a step of the tree traversal.
  * q   r
  *  \ /
  *   p
  *
  * The entry defines 2 input/parent nodes (q and r) and one output/child node (p)
  * qz represents the branch length(s) of the branch connecting q and p
  * rz represents the branch length(s) of the branch connecting r and p
  * PLL_TIP_TIP     Both p and r are tips
  * PLL_INNER_INNER Both p and r are inner nodes
  * @note PLL_TIP_INNER   q is a tip and r is an inner node (by convention, flip q and r if required)
  */
typedef struct
{
  int tipCase;                  /**< Type of entry, must be PLL_TIP_TIP PLL_TIP_INNER or PLL_INNER_INNER */
  int pNumber;                  /**< should exist in some nodeptr p->number */
  int qNumber;                  /**< should exist in some nodeptr q->number */
  int rNumber;                  /**< should exist in some nodeptr r->number */
  double qz[PLL_NUM_BRANCHES];
  double rz[PLL_NUM_BRANCHES];
  /* recom */
  int slot_p;                   /**< In recomputation mode, the RAM slot index for likelihood vector of node p, otherwise unused */
  int slot_q;                   /**< In recomputation mode, the RAM slot index for likelihood vector of node q, otherwise unused */
  int slot_r;                   /**< In recomputation mode, the RAM slot index for likelihood vector of node r, otherwise unused */
  /* E recom */
} traversalInfo;

/** @brief Traversal descriptor.
  * 
  * Describes the state of a traversal descriptor
  */
typedef struct
{
  traversalInfo *ti;              /**< list of traversal steps */
  int count;                      /**< number of traversal steps */
  int functionType;
  pllBoolean traversalHasChanged;   
  pllBoolean *executeModel;           
  double  *parameterValues;
} traversalData;

/** @brief Node record structure
  * 
  * Each inner node is a trifurcation in the tree represented as a circular list containing 3 node records. One node record uniquely identifies a subtree, and the orientation of the likelihood vector within a node
  *
  * p1 -------> p2 ----> to the next node
  * ^           |
  * |-----p3<---|          
  * 
  */
struct noderec;

/** @brief Branch length information.
  * 
  * @todo add relevant info on where this is used ???
  */
typedef struct
{
  unsigned int *vector; 
  int support;   
  struct noderec *oP;
  struct noderec *oQ;
} branchInfo;





/** @brief Linkage of partitions.
  * 
  * @todo add relevant info on where this is used ???
  */
typedef struct
{
  pllBoolean valid;
  int partitions;  
  int *partitionList;
}
  linkageData;
typedef struct
{
  int entries;
  linkageData* ld;
}
  linkageList;



  /** 
   *
   * the data structure below is fundamental for representing trees 
     in the library!

     Inner nodes are represented by three instances of the nodeptr data structure that is linked 
     via a cyclic list using the next pointer.

     So for building an inner node of the tree we need to allocate three nodeptr 
     data structures and link them together, e.g.:

     assuming that we have allocated space for an inner node 
     for nodeptr pointers p1, p2, p3, 

     we would then link them like this:

     p1->next = p2;
     p2->next = p3;
     p3->next = p1;

     also note that the node number that identifies the inner node 
     needs to be set to the same value.

     for n taxa, tip nodes are enumarated/indexed from 1....n,
     and inner node inbdices start at n+1. Assuming that we have 10 taxa 
     and this is our first inner node, we'd initialize the number as follows:

     p1->number = 11;
     p2->number = 11;
     p3->number = 11;

     Note that the node number is important for indexing tip sequence data as well as inner likelihood vectors 
     and that it is this number (the index) that actually gets stored in the traversal descriptor.

     Tip nodes are non-cyclic nodes that simply consist of one instance/allocation of nodeptr.

     if we have allocated a tip data structure nodeptr t1, 
     we would initialize it as follows:

     t1->number = 1;

     t1->next = NULL;

     now let's assume that we want to build a four taxon tree with tips t1, t2, t3, t4 
     and inner nodes (p1,p2,p3) and (q1,q2,q3).

     we first build the tips:

     t1->number = 1;
     t1->next = NULL;
     
     t2->number = 2;
     t2->next = NULL;

     t3->number = 3;
     t3->next = NULL;

     t4->number = 4;
     t4->next = NULL;
     
     now the first inner node

     p1->next = p2;
     p2->next = p3;
     p3->next = p1;    

     p1->number = 5;
     p2->number = 5;
     p3->number = 5;

     and the second inner node.

     q1->next = q2;
     q2->next = q3;
     q3->next = q1;    

     q1->number = 6;
     q2->number = 6;
     q3->number = 6;
     
     now we need to link the nodes together such that they form a tree, let's assume we want ((t1,t2), (t3, t4));

     we will have to link the nodes via the so-called back pointer,
     i.e.:

     let's connect node p with t1 and t2

     t1->back = p1;
     t2->back = p2;

     and vice versa:

     p1->back = t1;
     p2->back = t2;

     let's connect node p with node q:

     p3->back = q3;

     and vice versa:

     q3->back = p3;

     and now let's connect node q with tips t3 and t4:

     q1->back = t3;
     q2->back = t4;

     and vice versa:

     t3->back = q1;
     t4->back = q2;

     What remains to be done is to set up the branch lengths.
     Using the data structure below, we always have to store the 
     branch length twice for each "topological branch" unfortunately.

     Assuming that we are only estimating a single branch across all partitions 
     we'd just set the first index of the branch length array z[PLL_NUM_BRANCHES].

     e.g., 

     t3->z[0] = q1->z[0] = 0.9;

     the above operation for connecting nodes is implemented in functions hookup() which will set 
     the back pointers of two nodes that are to be connected as well as the branch lengths.

     The branchInfo data field is a pointer to a data-structure that stores meta-data and requires 
     the tree not to change while it is being used.
     
     Also, this pointer needs to be set by doing a full tree traversal on the tree.

     Note that q1->bInf == t3->bInf in the above example.

     The hash number is used for mapping bipartitions to a hash table as described in the following paper:

     A. Aberer, N. Pattengale, A. Stamatakis: "Parallelized phylogenetic post-analysis on multi-core architectures". Journal of Computational Science 1, 107-114, 2010.
     
     The support data field stores the support value for the branch associated with each nodeptr structure.
     Note that support always refers to branches. 

     Thus for consistency, q3->support must be equal to p3->support;

     Finally, the three char fields x, xPars and xBips are very very important!

     They are used to denote the presence/absence or if you want, direction of the 
     parsimony, bipartition, or likelihood vector at a node with respect to the virtual root.

     Essentially, they are just used as single presence/absence bits and ONLY for inner nodes!

     When setting up new inner nodes, one of the three pointers in the cyclic list must 
     have x = 1 and the other two x = 0;

     in the above example we could set:

     p1->x = 0;
     p2->x = 0;
     p3->x = 1;

     q1->x = 0;
     q2->x = 0;
     q3->x = 1;

     This would mean that the virtual root is located at the inner branch of the four taxon tree ((t1,t2),(t3,t4));

     When we re-root the tree at some other branch we need to update the location of the x pointer that is set to 1.

     This means if we root the tree at the branch leading to t1 we would set 

     p1->x = 1;
     p2->x = 0;
     p3->x = 0;

     the values for q remaon unchanged since q3 is still pointing toward the root.

     When we re-locate the root to branch p1 <-> t1 the fact that we have to "rotate" the x value that is set to 1
     to another node of the cyclic list representing the abstract topological node p, also tells us that we 
     need to re-compute the conditional likelihood array for p. 

     Note that, only one likelihood or parsimony array is stored per inner node and the location of x essentially tells us which subtree 
     it summarizes, if p1->x == 1, it summarizes subtree (t2, (t3, t4)), if p3->x = 1 the likelihood vector associated with 
     node p summarizes subtree (t1, t2).

     @todo I think we should rename the back pointer. It's not back, it can be forward depending on the orientation. We should renmae it to outer. Back is too confusing, I would assume it's the opposite of next, i.e. previous.

     @struct noderec

     @brief Tree node record

     A node in a tree is a structure which contains a cyclic list of pointers to 3 nodes which we call a \e roundabout. The first node is the structure itself, and the other two nodes are accessed via \a noderec->next and \a noderec->next->next. To access the outer node with which each of the 3 nodes forms an edge one has to use the \a back pointer

     @var noderec::next
     @brief Next node in the roundabout

     @var noderec::back
     @brief Outer node

     @var noderec::number
     @brief Node identifier

     In general, tips (i.e. leaves) are numbered from 1 to \e n where \e n is the number of taxa. Identifiers for internal nodes start from \e n + 1. Note
     that for a given inner node, the identifier must be the same for all 3 nodes that compose it.

     @var info::z
     @brief The branch lengths per partition for the main node in the roundabout

     @todo Append an image
  */
typedef  struct noderec
{
 
  branchInfo      *bInf;
  double           z[PLL_NUM_BRANCHES];
  struct noderec  *next;        
  struct noderec  *back;       
  hashNumberType   hash;
  int              support;
  int              number;    
  char             x;
  char             xPars;
  char             xBips;
}
  node, *nodeptr;

typedef unsigned int parsimonyNumber;

/* @brief Alignment, transition model, model of rate heterogenety and likelihood vectors for one partition.
  * 
  * @todo De-couple into smaller data structures
  *
  * ALIGNMENT DATA 
  * This depends only on the type of data in this partition of the alignment 
  *
  * MODEL OF RATE HETEROGENETY, We use either GAMMA or PSR 
  * Rate heterogenety: Per Site Categories (PSR) model aka CAT, 
  * Rate of site i is given by perSiteRates[rateCategory[i]]
  *
  * TRANSITION MODEL: We always assume General Time Reversibility 
  * Transistion probability matrix: P(t) = exp(Qt)
  * Branch length t is the expected number of substitutions per site 
  * Pij(t) is the probability of going from state i to state j in a branch of length t 
  * Relative substitution rates (Entries in the Q matrix) 
  * In GTR we can write Q = S * D, where S is a symmetrical matrix and D a diagonal with the state frequencies 

    @var protModels
    @brief Protein models

    @detail Detailed protein models descriptiopn

    @var autoProtModels
    @brief Auto prot models
    @detail Detailed auto prot models
  */
 


/** @struct pInfo
    
    @brief Partition information structure

    This data structure encapsulates all properties and auxiliary variables that together
    consist a partition.

    @var pInfo::dataType
    @brief Type of data this partition contains

    Can be DNA (\b PLL_DNA_DATA) or AminoAcid (\b PLL_AA_DATA) data

    @var pInfo::states
    @brief Number of states

    Number of states this type of data can consist of

    @var pInfo::maxTipStates
    @brief Number of undetermined states (possible states at the tips)

    This is the total number of possible states that can appear in the alignment. This includes degenerate (undetermined) bases

    @var pInfo::partitionName
    @brief Name of partition

    A null-terminated string describing the name of partition

    @var pInfo::lower
    @brief Position of the first site in the alignment that is part of this partition [1, tr->originalCrunchedLength]

    @var pInfo::upper
    @brief Position of the last site that is part of this partition plus one (i.e. position of the first site that is not part of this partition) 

    @var pInfo::width
    @brief Number of sites in the partition (i.e. \a upper - \a lower)

    @var pInfo::wgt
    @brief Weight of site

    Number of times this particular site appeared in the partition before the duplicates were removed and replaced by this weight

    @var pInfo::empiricalFrequencies
    @brief Empirical frequency of each state in the current partition

    @var pInfo::perSiteRates
    @brief Per Site Categories (PSR) or aka CAT values for each rate

    @var pInfo::rateCategory
    @brief CAT category index for each site

    @var pInfo::numberOfCategories
    @brief CAT size of the set of possible categories

    @var pInfo::alpha
    @brief Gamma parameter to be optimized
    
    @var pInfo::gammaRates
    @brief Values of the 4 gamma categories (rates) computed given an alpha

    @var pInfo::substRates
    @brief Entries of substitution matrix, e.g. 6 free parameters in DNA

    In GTR we can write \f$ Q = S * D \f$, where \f$ S \f$ is a symmetrical matrix and \f$ D \f$ a diagonal with the state frequencies,
    which is represented by the array \a frequencies. The symmetrical matrix is the array \a substRates

    @var pInfo::frequencies
    @brief State frequencies, entries in D are initialized as empiricalFrequencies
    
    In GTR we can write \f$ Q = S * D \f$, where \f$ S \f$ is a symmetrical matrix and \f$ D \f$ a diagonal with the state frequencies,
    which is represented by the array \a frequencies. The symmetrical matrix is the array \a substRates

    @var pInfo::freqExponents

    @var pInfo::EIGN
    @brief Eigenvalues of Q matrix

    @var pInfo::EV
    @brief Eigenvectors of Q matrix

    @var pInfo::EI
    @brief Inverse eigenvectors of Q matrix

    @var pInfo::left
    @brief P matrix for the left term of the conditional likelihood equation

    @var pInfo::right
    @brief P matrix for the right term of the conditional likelihood equation

    @var pInfo::tipVector
    @brief Precomputed (based on current P matrix) conditional likelihood vectors for every possible base 

    @var pInfo::EIGN_LG4
    @brief Eigenvalues of Q matrix for the LG4 model

    @var pInfo::EV_LG4
    @brief Eigenvectors of Q matrix for the LG4 model

    @var pInfo::EI_LG4
    @brief Inverse eigenvectors of Q matrix for the LG4 model
    
    @var pInfo::frequencies_LG4
    @brief State frequencies for the LG4 model

    @var pInfo::tipVector_LG4
    @brief Precomputed (based on current P matrix) conditional likelihood vectors for every possible base for the LG4 model

    @var pInfo::substRates_LG4
    @brief Entries of substitution matrix for the LG4 model

    @var pInfo::protModels
    @brief Protein model for current partition

    In case \a pInfo::dataType is set to \a PLL_AA_DATA then \a protModels indicates the index in the global array \a protModels
    of the protein model that the current partition uses.

    @var pInfo::autoProtModels
    @brief Best fitted protein model for the \b PLL_AUTO partitions

    If \a protModels is set to \b PLL_AUTO then \a autoProtModels holds the currently detected best fitting protein model for the partition

    @var pInfo::protUseEmpiricalFreqs

    @var pInfo::nonGTR

    @var pInfo::optimizeBaseFrequencies

    @var pInfo::optimizeAlphaParameter

    @var pInfo::optimizeSubstitutionRates

    @var pInfo::symmetryVector

    @var pInfo::frequencyGrouping


    @todo
      Document freqExponents

*/



typedef struct {
  int     dataType;
  int     states;
  int     maxTipStates;
  char   *partitionName;
  int     lower;
  int     upper;
  int     width;
  int    *wgt;
  double *empiricalFrequencies; 


  /* MODEL OF RATE HETEROGENETY, We use either GAMMA or PSR */
  /* Rate heterogenety: Per Site Categories (PSR) model aka CAT, see updatePerSiteRates() */
  /* Rate of site i is given by perSiteRates[rateCategory[i]] */
  double *perSiteRates;
  int    *rateCategory;
  int     numberOfCategories;
  /* Rate heterogenety: GAMMA model of rate heterogenety */
  double alpha;
  double *gammaRates;


  /* TRANSITION MODEL: We always assume General Time Reversibility */
  /* Transistion probability matrix: P(t) = exp(Qt)*/
  /* Branch length t is the expected number of substitutions per site */
  /* Pij(t) is the probability of going from state i to state j in a branch of length t */
  /* Relative substitution rates (Entries in the Q matrix) */
  /* In GTR we can write Q = S * D, where S is a symmetrical matrix and D a diagonal with the state frequencies */
  double *substRates;       /**< TRANSITION MODEL Entries in S, e.g. 6 free parameters in DNA */   
  double *frequencies;      /**< State frequencies, entries in D, are initialized as empiricalFrequencies */
  double *freqExponents;
  /* Matrix decomposition: @todo map this syntax to Explanation of the mathematical background */
  double *EIGN;
  double *EV;
  double *EI;
  double *left;
  double *right;
  double *tipVector;


  /* asc bias */
  pllBoolean       ascBias;
  int           ascOffset;
  int         * ascExpVector;
  double      * ascSumBuffer;
  double      * ascVector;
  double        ascScaler[64];
  
  /* LG4 */

  double *EIGN_LG4[4];
  double *EV_LG4[4];
  double *EI_LG4[4];

  double *frequencies_LG4[4];
  double *tipVector_LG4[4];
  double *substRates_LG4[4];
  
  /* LG4X */

  double lg4x_weights[4];
  double lg4x_weightExponents[4];
  double lg4x_weightsBuffer[4];
  double lg4x_weightExponentsBuffer[4];
  double lg4x_weightLikelihood;
  
  /* Protein specific */
  int     protModels;			/**< Empirical model matrix */
  int     autoProtModels;		/**< Model selected with "auto" protein model */
  int     protUseEmpiricalFreqs;	/**< Whether to use empirical frequencies for protein model */

  pllBoolean nonGTR;
  pllBoolean optimizeBaseFrequencies;	/**< Whether to optimize base frequencies */
  pllBoolean optimizeAlphaParameter;	/**< Whether to optimize alpha parameters and gamma rates */
  pllBoolean optimizeSubstitutionRates;	/**< Whether to optimize substitution rates */
  int    *symmetryVector;		/**< Specify linkage between substitution rate parameters */
  int    *frequencyGrouping;

  /* LIKELIHOOD VECTORS */

  /* partial LH Inner vectors  ancestral vectors, we have 2*tips - 3 inner nodes */
  double          **xVector;          /**< Conditional likelihood vectors for inner nodes */
  unsigned char   **yVector;          /**< Tip entries (sequence) for tip nodes */
  unsigned int     *globalScaler;     /**< Counters for scaling operations done at node i */

  /* data structures for conducting per-site likelihood scaling.
     this allows to compute the per-site log likelihood scores 
     needed for RELL-based bootstrapping and all sorts of statistical 
     tests for comparing trees ! */
  int              **expVector;     /**< @brief An entry per inner node. Each element is an array of size the number of sites in the current partition and represents how many times the respective site has been scaled in the subtree rooted at the current node */
  size_t           *expSpaceVector; /**< @brief Each entry represents an inner node and states the size of the corresponding element in \a expVector, which is the number of sites for the current partition */

  /* These are for the saveMemory option (tracking gaps to skip computations and memory) */
  size_t           *xSpaceVector;       /* Size of conditional likelihood vectors per inner node */
  int               gapVectorLength;    /** Length of \a gapVector bitvector in unsigned integers assuming that \a unsigned \a int is 32bits. It is set to partition size / 32 */
  unsigned int     *gapVector;          /** A bit vector of size \a gapVectorLength * 32 bits. A bit is set to 1 if the corresponding */
  double           *gapColumn; 

  /* Parsimony vectors at each node */
  size_t parsimonyLength;
  parsimonyNumber *parsVect; 
  parsimonyNumber *perSiteParsScores;

  /* This buffer of size width is used to store intermediate values for the branch length optimization under 
     newton-raphson. The data in here can be re-used for all iterations irrespective of the branch length.
   */
  double *sumBuffer; 

  /* Buffer to store the per-site log likelihoods */
  double *perSiteLikelihoods;

  /* This buffer of size width is used to store the ancestral state at a node of the tree. */
  double *ancestralBuffer;

  /* From tree */
  pllBoolean executeModel;
  double fracchange;
  double rawFracchange;
  double partitionContribution;
  double partitionWeight;
  double partitionLH;

// #if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
  int partitionAssignment;
// #endif

} pInfo;

typedef struct
 {
   pInfo **partitionData;
   int numberOfPartitions;
   pllBoolean perGeneBranchLengths;
   pllBoolean dirty;
   linkageList *alphaList;
   linkageList *rateList;
   linkageList *freqList;
 }  partitionList;



#define PLL_REARR_SETTING 1
#define PLL_FAST_SPRS     2
#define PLL_SLOW_SPRS     3


/** @brief Checkpointing states. 
 * 
 * @todo Raxml specific 
  */
typedef struct {
 
  int state;

  /*unsigned int vLength;*/
  double accumulatedTime;  
  int rearrangementsMax;
  int rearrangementsMin;
  int thoroughIterations;
  int fastIterations;
  int mintrav;
  int maxtrav;
  int bestTrav;
  double startLH; 
  double lh;
  double previousLh;
  double difference;
  double epsilon;  
  pllBoolean impr;
  pllBoolean cutoff;  
       
  double tr_startLH;
  double tr_endLH;
  double tr_likelihood;
  double tr_bestOfNode;  
  double tr_lhCutoff;
  double tr_lhAVG;
  double tr_lhDEC;
  int    tr_NumberOfCategories;
  int    tr_itCount;  
  int    tr_doCutoff;
  int    tr_thoroughInsertion;
  int    tr_optimizeRateCategoryInvocations;
 
  /* prevent users from doing stupid things */
 
  int searchConvergenceCriterion;
  int rateHetModel;
  int maxCategories;
  int NumberOfModels;
  int numBranches;
  int originalCrunchedLength;    
  int mxtips;
  char seq_file[1024];
} checkPointState;



/* recomp */
#ifdef _DEBUG_RECOMPUTATION
typedef struct {
  unsigned long int numTraversals;
  unsigned long int tt;
  unsigned long int ti;
  unsigned long int ii;
  unsigned int *travlenFreq;
} traversalCounter;
#endif
/* E recomp */


/** @brief Tree topology.
 * 
 * @todo Apart from the topology this structure contains several fields that act like global variables in raxml
  */
typedef  struct  {

  int *ti;

  /* recomp */
  recompVectors *rvec;            /**< this data structure tracks which vectors store which nodes */
  float maxMegabytesMemory;       /**< User says how many MB in main memory should be used */
  float vectorRecomFraction;      /**< vectorRecomFraction ~= 0.8 * maxMegabytesMemory  */
  pllBoolean useRecom;               /**< ON if we apply recomputation of ancestral vectors*/
#ifdef _DEBUG_RECOMPUTATION 
  traversalCounter *travCounter;
  double stlenTime;
#endif
  /* E recomp */
  
  pllBoolean fastScaling;
  pllBoolean saveMemory;
  int              startingTree;
  long             randomNumberSeed;

  double          *lhs;         /**< Array to store per-site log likelihoods of \a originalCrunchedLength (compressed) sites */
  double          *patrat;      /**< rates per pattern */
  double          *patratStored; 
  int             *rateCategory;
  int             *aliaswgt;    /**< weight by pattern */ 
  pllBoolean    manyPartitions;

  pllBoolean grouped;              /**< No idea what this is, but is always set to PLL_FALSE */
  pllBoolean constrained;          /**< No idea what this is, but is always set to PLL_FALSE */
  int threadID;
  volatile int numberOfThreads;

//#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
 
  unsigned char *y_ptr; 
  
  double lower_spacing;
  double upper_spacing; 

  double *ancestralVector;

//#endif
  
  pllHashTable     *nameHash;
  char           ** tipNames;

  char             *secondaryStructureInput;

  traversalData    td[1];

  int              maxCategories;
  int              categories;

  double           coreLZ[PLL_NUM_BRANCHES];
  
 
  branchInfo       *bInf;

  int              multiStateModel;


  pllBoolean curvatOK[PLL_NUM_BRANCHES];

  /* the stuff below is shared among DNA and AA, span does
     not change depending on datatype */

  /* model stuff end */
  unsigned char    **yVector;        /**< list of raw sequences (parsed from the alignment)*/

  int              secondaryStructureModel;
  int              originalCrunchedLength; /**< Length of alignment after removing duplicate sites in each partition */

  int              *secondaryStructurePairs;

  double            fracchange;      /**< Average substitution rate */
  double            rawFracchange;
  double            lhCutoff;
  double            lhAVG;
  unsigned long     lhDEC;
  unsigned long     itCount;
  int               numberOfInvariableColumns;
  int               weightOfInvariableColumns;
  int               rateHetModel;

  double           startLH;
  double           endLH;
  double           likelihood;           /**< last likelihood value evaluated for the current topology */
 
  node           **nodep;                /**< pointer to the list of nodes, which describe the current topology */
  nodeptr          nodeBaseAddress;
  node            *start;                /**< starting node by default for full traversals (must be a tip contained in the tree we are operating on) */
  int              mxtips;  /**< Number of tips in the topology */

  int              *constraintVector;   /**< @todo What is this? */
  int              numberOfSecondaryColumns;
  pllBoolean          searchConvergenceCriterion;
  int              ntips;
  int              nextnode;  

  pllBoolean          bigCutoff;
  pllBoolean          partitionSmoothed[PLL_NUM_BRANCHES];
  pllBoolean          partitionConverged[PLL_NUM_BRANCHES];
  pllBoolean          rooted;
  pllBoolean          doCutoff;
 
  double         gapyness;

  char **nameList;     /**< list of tips names (read from the phylip file) */
  char *tree_string;   /**< the newick representaion of the topology */
  char *tree0;
  char *tree1;
  int treeStringLength;
 
  unsigned int bestParsimony;
  unsigned int *parsimonyScore;
  
  double bestOfNode;
  nodeptr removeNode;   /**< the node that has been removed. Together with \a insertNode represents an SPR move */
  nodeptr insertNode;   /**< the node where insertion should take place . Together with \a removeNode represents an SPR move*/

  double zqr[PLL_NUM_BRANCHES];
  double currentZQR[PLL_NUM_BRANCHES];

  double currentLZR[PLL_NUM_BRANCHES];
  double currentLZQ[PLL_NUM_BRANCHES];
  double currentLZS[PLL_NUM_BRANCHES];
  double currentLZI[PLL_NUM_BRANCHES];
  double lzs[PLL_NUM_BRANCHES];
  double lzq[PLL_NUM_BRANCHES];
  double lzr[PLL_NUM_BRANCHES];
  double lzi[PLL_NUM_BRANCHES];


  unsigned int **bitVectors;

  unsigned int vLength;

  pllHashTable *h;                 /**< hashtable for ML convergence criterion */
  //hashtable *h;
 
  int optimizeRateCategoryInvocations;

  checkPointState ckp;
  pllBoolean thoroughInsertion; /**< true if the neighbor branches should be optimized when a subtree is inserted (slower)*/
  pllBoolean useMedian;

  int autoProteinSelectionType;

  pllStack * rearrangeHistory;


  /* analdef defines */
  /* TODO: Do some initialization */
  int              bestTrav;            /**< best rearrangement radius */
  int              max_rearrange;       /**< max. rearrangemenent radius */
  int              stepwidth;           /**< step in rearrangement radius */
  int              initial;             /**< user defined rearrangement radius which also sets bestTrav if initialSet is set */
  pllBoolean          initialSet;          /**< set bestTrav according to initial */
  int              mode;                /**< candidate for removal */
  pllBoolean        perGeneBranchLengths;
  pllBoolean        permuteTreeoptimize;   /**< randomly select subtrees for SPR moves */
  pllBoolean        compressPatterns;
  double         likelihoodEpsilon;
  pllBoolean        useCheckpoint;

} pllInstance;

/** @brief Stores data related to a NNI move  */
typedef struct {
        pllInstance * tr;
        nodeptr p;
        int nniType;
        double z[PLL_NUM_BRANCHES]; // optimize branch lengths
        double z0[PLL_NUM_BRANCHES]; // unoptimized branch lengths
        double likelihood;
        double deltaLH;
} nniMove;

/***************************************************************/

typedef struct {
  int partitionNumber;
  int partitionLength;
} partitionType;

typedef struct
{
  double z[PLL_NUM_BRANCHES];
  nodeptr p, q;
  int cp, cq;
}
  connectRELL, *connptrRELL;

typedef  struct
{
  connectRELL     *connect; 
  int             start;
  double          likelihood;
}
  topolRELL;


typedef  struct
{
  int max;
  topolRELL **t;
}
  topolRELL_LIST;

/**************************************************************/

/** @brief Connection within a topology.
*   */
typedef struct conntyp {
    double           z[PLL_NUM_BRANCHES];           /**< branch length */
    node            *p, *q;       /**< parent and child sectors */
    void            *valptr;      /**< pointer to value of subtree */
    int              descend;     /**< pointer to first connect of child */
    int              sibling;     /**< next connect from same parent */
    } pllConnect, *connptr;

/** @brief Single Topology
*   */
typedef  struct {
    double           likelihood;
    int              initialTreeNumber;
    pllConnect         *links;       /**< pointer to first connect (start) */
    node            *start;
    int              nextlink;    /**< index of next available connect */
                                  /**< tr->start = tpl->links->p */
    int              ntips;
    int              nextnode;    /**< next available inner node for tree parsing */
    int              scrNum;      /**< position in sorted list of scores */
    int              tplNum;      /**< position in sorted list of trees */
    } topol;

/** @brief small helper data structure for printing out/downstream use of marginal ancestral probability vectors.
*
* it is allocated as an array that has the same length as the input alignment and can be used to 
*   index the ancestral states for each position/site/pattern 
*   */
typedef struct {
  double *probs; /**< marginal ancestral states */
  char c; /**< most likely stated, i.e. max(probs[i]) above */
  int states; /**< number of states for this position */
} ancestralState;

/** @brief List of topologies
*
*   */
typedef struct {
    double           best;        /**< highest score saved */
    double           worst;       /**< lowest score saved */
    topol           *start;       /**< starting tree for optimization */
    topol          **byScore;
    topol          **byTopol;
    int              nkeep;       /**< maximum topologies to save */
    int              nvalid;      /**< number of topologies saved */
    int              ninit;       /**< number of topologies initialized */
    int              numtrees;    /**< number of alternatives tested */
    pllBoolean          improved;
    } bestlist;

/** @brief  This is used to look up some hard-coded data for each data type 
*   */
typedef struct 
{
  int leftLength;         /**< s^2 */
  int rightLength;/**< s^2 */
  int eignLength;/**<  s */
  int evLength;
  int eiLength;
  int substRatesLength;   /**< (s^2 - s)/2 free model parameters for matrix Q i.e. substitution rates */
  int frequenciesLength;  /**< s frequency of each state */ 
  int tipVectorLength;    /* ??? */
  int symmetryVectorLength;
  int frequencyGroupingLength;

  pllBoolean nonGTR;
  pllBoolean optimizeBaseFrequencies;

  int undetermined;

  const char *inverseMeaning;

  int states;   /* s */

  pllBoolean smoothFrequencies;

  const unsigned  int *bitVector;

} partitionLengths;

typedef struct
{
  int rearrangeType;
  double  likelihood;

  union {
    struct {
      double * zp;
      double * zpn;
      double * zpnn;
      double * zqr;
      nodeptr pn;
      nodeptr pnn;
      nodeptr r;
      nodeptr p;
      nodeptr q;
    } SPR;
    struct {
      nodeptr origin;
      int swapType;
      double z[PLL_NUM_BRANCHES];
    } NNI;
  };
} pllRollbackInfo;


/** @struct pllRearrangeAttr
 
    @brief Structure holding attributes for searching possible tree rearrangements
    
    Holds the attributes for performing tree rearrangements.

    @var pllRearrangeAttr
      The origin node where the search should start

    @var pllRearrangeAttr:mintrav
      The minimum radius around the origin node \a p for which nodes should be tested

    @var pllRearrangeAttr:maxtrav
      The maximum radius around the origin node \a p for which nodes should be tested

    @var pllRearrangeAttr:max
      Maximum number of results to be returned
*/
typedef struct
 {
   nodeptr p;
   int mintrav;
   int maxtrav;
 } pllRearrangeAttr;

/** @typedef pllRearrangeInfo
    
    @brief Tree rearrangement information structure

    Holds information for conducting tree arrangements. This structure
    is the result of a tree arrangement search under given search
    attributes.

    @var pllRearrangeInfo::rearrangeType
      Type of rearrangement. Can be \b PLL_REARRANGE_SPR, \b PLL_REARRANGE_NNI or
      \b PLL_REARRANGE_TBR
    
    @var pllRearrangeInfo::likelihood
      Holds the computed likelihood for the addressed rearrangement

    @var pllRearrangeInfo::SPR::removeNode
      Node where to perform subtree pruning

    @var pllRearrangeInfo::SPR::insertNode
      Node where to place the pruned subtree

    @var pllRearrangeInfo::zqr
      Holds the computed branch lengths after the SPR
*/
typedef struct
 {
   int rearrangeType;
   double  likelihood;
   union {
     struct {
       nodeptr removeNode;
       nodeptr insertNode;
       double  zqr[PLL_NUM_BRANCHES];
     } SPR;
     struct {
       nodeptr originNode;
       int     swapType;
     } NNI;
   };
 } pllRearrangeInfo;


typedef struct
 {
   int max_entries;
   int entries;
   pllRearrangeInfo * rearr;
 } pllRearrangeList;

/** @brief Generic structure for storing a multiple sequence alignment */
typedef struct
 {
   int              sequenceCount;      /**< @brief Number of sequences */
   int              sequenceLength;     /**< @brief Length of sequences */
   int              originalSeqLength;  /**< @brief Original length of sequences (not modified after removing duplicates) */
   char          ** sequenceLabels;     /**< @brief An array of where the \a i-th element is the name of the \a i-th sequence */
   unsigned char ** sequenceData;       /**< @brief The actual sequence data */
   int            * siteWeights;        /**< @brief An array where the \a i-th element indicates how many times site \a i appeared (prior to duplicates removal) in the alignment */
 } pllAlignmentData;


/******************** START OF API FUNCTION DESCRIPTIONS ********************/

#if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
pllBoolean isThisMyPartition(partitionList *pr, int tid, int model);
void printParallelTimePerRegion(void); 
#endif

#ifdef _FINE_GRAIN_MPI
extern void pllFinalizeMPI (void);
#endif



/**
 * @brief Create the main instance of PLL
 *   
 * Create an instance of the phylogenetic likelihood library
 *
 * @param rateHetModel   Rate heterogeneity model
 * @param fastScaling    TODO: explain fastScaling here
 * @param saveMemory     TODO: explain saveMemory here
 * @param useRecom       If set to \b PLL_TRUE, enables ancestral state recomputation
 * 
 * @todo                 Document fastScaling, rate heterogeneity and saveMemory and useRecom
 *
 * @note                 Do not set \a saveMemory to when using \a useRecom as memory saving 
 *                       techniques are not yet implemented for ancestral state recomputation. 
 * 
 * @return               On success returns an instance to PLL, otherwise \b NULL
 */
extern pllInstance * pllCreateInstance (pllInstanceAttr * pInst);

/** 
 *  @ingroup instanceLinkingGroup
 *  @brief Load alignment to the PLL instance
 *   
 *   Loads (copies) the parsed alignment \a alignmentData to the PLL instance
 *   as a deep copy.
 * 
 *    @param tr              The library instance
 *    @param alignmentData   The multiple sequence alignment
 *    @param partitions      List of partitions
 *
 *    @return Returns 1 in case of success, 0 otherwise.
 */
extern int pllLoadAlignment (pllInstance * tr, 
                             pllAlignmentData * alignmentData, 
                             partitionList * pList);

/**
 * @brief Compute the empirical base frequencies for all partitions
 *
 * Compute the empirical base frequencies for all partitions in the list \a pl.
 *
 * @param pl                Partition list
 * @param alignmentData     Multiple sequence alignment
 *
 * @return   A list of \a pl->numberOfPartitions arrays each of size
             \a pl->partitionData[i]->states, where \a i is the \a i-th partition
*/
extern double ** pllBaseFrequenciesAlignment (pllAlignmentData * alignmentData, partitionList * pl);
extern double ** pllBaseFrequenciesInstance (pllInstance * tr, partitionList * pl);

/* pthreads and MPI */
extern void pllStartPthreads (pllInstance *tr, partitionList *pr);
extern void pllStopPthreads (pllInstance * tr);
extern void pllLockMPI (pllInstance * tr);
extern void pllInitMPI(int * argc, char **argv[]);


/* handling branch lengths*/
extern double pllGetBranchLength (pllInstance *tr, nodeptr p, int partition_id);
extern void pllSetBranchLength (pllInstance *tr, nodeptr p, int partition_id, double bl);
extern int pllNniSearch(pllInstance * tr, partitionList *pr, int estimateModel);
extern void pllOptimizeBranchLengths ( pllInstance *tr, partitionList *pr, int maxSmoothIterations );


extern void pllEvaluateLikelihood (pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean fullTraversal, pllBoolean getPerSiteLikelihoods);
extern void pllUpdatePartials (pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean masked);
extern void pllUpdatePartialsAncestral(pllInstance *tr, partitionList *pr, nodeptr p);
extern void pllNewviewIterative(pllInstance *tr, partitionList *pr, int startIndex);
extern void pllEvaluateIterative(pllInstance *tr, partitionList *pr, pllBoolean getPerSiteLikelihoods);

/* newick parser declarations */
extern pllNewickTree * pllNewickParseString (const char * newick);
extern pllNewickTree * pllNewickParseFile (const char * filename);
extern int pllValidateNewick (pllNewickTree *);
extern void pllNewickParseDestroy (pllNewickTree **);
extern int pllNewickUnroot (pllNewickTree * t);
extern char * pllTreeToNewick ( char *treestr, pllInstance *tr, partitionList *pr, nodeptr p,
      pllBoolean printBranchLengths, pllBoolean printNames, pllBoolean printLikelihood,
      pllBoolean rellTree, pllBoolean finalPrint, int perGene,
      pllBoolean branchLabelSupport, pllBoolean printSHSupport);

/* partition parser declarations */
extern void  pllQueuePartitionsDestroy (pllQueue ** partitions);
extern pllQueue * pllPartitionParse (const char * filename);
extern pllQueue * pllPartitionParseString (const char * p);
extern void pllPartitionDump (pllQueue * partitions);
void pllBaseSubstitute (pllInstance * tr, partitionList * partitions);
//void pllBaseSubstitute (pllAlignmentData * tr, partitionList * partitions);
partitionList * pllPartitionsCommit (pllQueue * parts, pllAlignmentData * alignmentData);
int pllPartitionsValidate (pllQueue * parts, pllAlignmentData * alignmentData);
extern void pllAlignmentRemoveDups (pllAlignmentData * alignmentData, partitionList * pl);
void pllPartitionsDestroy (pllInstance *, partitionList **);

/* alignment data declarations */
extern void pllAlignmentDataDestroy (pllAlignmentData *);
extern int pllAlignmentDataDumpFile (pllAlignmentData *, int, const char *);
extern void pllAlignmentDataDumpConsole (pllAlignmentData * alignmentData);
extern pllAlignmentData * pllInitAlignmentData (int, int);
extern pllAlignmentData * pllParseAlignmentFile (int fileType, const char *);
extern pllAlignmentData *pllParsePHYLIPString (const char *rawdata, long filesize);


/* model management */
int pllInitModel (pllInstance *, partitionList *);
void pllInitReversibleGTR(pllInstance * tr, partitionList * pr, int model);
void pllMakeGammaCats(double alpha, double *gammaRates, int K, pllBoolean useMedian);
int pllLinkAlphaParameters(char *string, partitionList *pr);
int pllLinkFrequencies(char *string, partitionList *pr);
int pllLinkRates(char *string, partitionList *pr);
int pllSetSubstitutionRateMatrixSymmetries(char *string, partitionList * pr, int model);
void pllSetFixedAlpha(double alpha, int model, partitionList * pr, pllInstance *tr);
void pllSetFixedBaseFrequencies(double *f, int length, int model, partitionList * pr, pllInstance *tr);
int  pllSetOptimizeBaseFrequencies(int model, partitionList * pr, pllInstance *tr);
void pllSetSubstitutionMatrix(double *q, int length, int model, partitionList * pr,  pllInstance *tr);
void pllSetFixedSubstitutionMatrix(double *q, int length, int model, partitionList * pr,  pllInstance *tr);
int pllGetInstRateMatrix (partitionList * pr, int model, double * outBuffer);
int pllOptimizeModelParameters(pllInstance *tr, partitionList *pr, double likelihoodEpsilon);
double pllGetAlpha (partitionList * pr, int pid);
void pllGetGammaRates (partitionList * pr, int pid, double * outBuffer);
extern void pllGetBaseFrequencies(partitionList * pr, int model, double * outBuffer);
extern void pllGetSubstitutionMatrix (partitionList * pr, int model, double * outBuffer);
void pllEmpiricalFrequenciesDestroy (double *** empiricalFrequencies, int models);
extern void pllOptRatesGeneric(pllInstance *tr, partitionList *pr, double modelEpsilon, linkageList *ll);
extern void pllOptBaseFreqs(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll);
extern void pllOptAlphasGeneric(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll);
extern void pllOptLG4X(pllInstance *tr, partitionList * pr, double modelEpsilon, linkageList *ll, int numberOfModels);

/* tree topology */
void pllTreeInitTopologyNewick (pllInstance *, pllNewickTree *, int);
void pllTreeInitTopologyRandom (pllInstance * tr, int tips, char ** nameList);
void pllTreeInitTopologyForAlignment (pllInstance * tr, pllAlignmentData * alignmentData);
extern void pllMakeRandomTree ( pllInstance *tr);
void pllMakeParsimonyTree(pllInstance *tr);
extern void pllMakeParsimonyTreeFast(pllInstance *tr, partitionList *pr, int sprDist);
void pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInstance * tr, partitionList * partitions, int sprDist);
nodeptr pllGetRandomSubtree(pllInstance *);
extern void pllFreeParsimonyDataStructures(pllInstance *tr, partitionList *pr);
void pllDestroyInstance (pllInstance *);
extern void pllGetAncestralState(pllInstance *tr, partitionList *pr, nodeptr p, double * outProbs, char * outSequence);
unsigned int pllEvaluateParsimony(pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean full, pllBoolean perSiteScores);
void pllInitParsimonyStructures(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores);

/* rearrange functions (NNI and SPR) */
pllRearrangeList * pllCreateRearrangeList (int max);
void pllDestroyRearrangeList (pllRearrangeList ** bestList);
void pllRearrangeSearch (pllInstance * tr, partitionList * pr, int rearrangeType, nodeptr p, int mintrav, int maxtrav, pllRearrangeList * bestList);
void pllRearrangeCommit (pllInstance * tr, partitionList * pr, pllRearrangeInfo * rearr, int saveRollbackInfo);
int pllRearrangeRollback (pllInstance * tr, partitionList * pr);
void pllClearRearrangeHistory (pllInstance * tr);
int pllRaxmlSearchAlgorithm (pllInstance * tr, partitionList * pr, pllBoolean estimateModel);
int pllGetTransitionMatrix (pllInstance * tr, partitionList * pr, nodeptr p, int model, int rate, double * outBuffer);
void pllGetTransitionMatrix2 (pllInstance * tr, partitionList * pr, int model, nodeptr p, double * outBuffer);
int pllGetCLV (pllInstance * tr, partitionList * pr, nodeptr p, int partition, double * outProbs);
extern int pllTopologyPerformNNI(pllInstance * tr, nodeptr p, int swap);

/* hash functions */
unsigned int pllHashString (const char * s, unsigned int size);
int pllHashAdd  (pllHashTable * hTable, unsigned int hash, const char * s, void * item);
pllHashTable * pllHashInit (unsigned int n);
int pllHashSearch (struct pllHashTable * hTable, char * s, void ** item);
void pllHashDestroy (struct pllHashTable ** hTable, void (*cbDealloc)(void *));

/* node specific functions */
nodeptr pllGetOrientedNodePointer (pllInstance * pInst, nodeptr p);

/* other functions */
extern char * pllReadFile (const char *, long *);
extern int * pllssort1main (char ** x, int n);
extern node ** pllGetInnerBranchEndPoints (pllInstance * tr);

/* ---------------- */

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif