File: Timbl_6.3_API.tex

package info (click to toggle)
timbl 6.10-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,088 kB
  • sloc: cpp: 17,211; ansic: 425; sh: 70; makefile: 63
file content (1867 lines) | stat: -rw-r--r-- 63,168 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
% TiMBL 6.3 API

\documentclass{report}
\usepackage{epsf}
\usepackage{a4wide}
\usepackage{palatino}
\usepackage{fullname}
\usepackage{url}

\newcommand{\chisq}{{$ \chi^2 $}}

\author{Ko van der Sloot\\ \ \\ Induction of Linguistic Knowledge\\
        Computational Linguistics\\ Tilburg University \\ \ \\
        P.O. Box 90153, NL-5000 LE, Tilburg, The Netherlands \\ URL:
        http://ilk.uvt.nl}

\title{{\huge TiMBL: Tilburg Memory-Based Learner} \\ \vspace*{0.5cm}
{\bf version 6.3} \\ \vspace*{0.5cm}{\huge API Reference Guide}\\
\vspace*{1cm} {\it ILK Technical Report -- ILK 10-03}}

%better paragraph indentation
\parindent 0pt
\parskip 9pt


\begin{document}

\maketitle

\tableofcontents

\chapter*{Preface}

This is a brief description of the TimblAPI class, the application
programming interface to the Timbl\footnote{\url{http://ilk.uvt.nl/timbl}} software package, and its main
functions. For an introduction into Timbl, consult the Timbl Reference
Guide \cite{Daelemans+10}. Although most of the API can be
traced in the {\tt TimblAPI.h} file, the reverse is not true; some
functions {\tt TimblAPI.h} are still ``work in progress'' and some others
are artefacts to simplify the implementation of the TiMBL main
program\footnote{Timbl.cxx is therefore {\em not} a good example of
  how to use the API.}.

To learn more about using the API, you should study programs such as
{\tt classify.cxx}, {\tt tse.cxx}, and the examples given in this
manual, which can all be found in the {\tt demos} directory of this
distribution. As you can readily gather from these examples, the basic
thing you need to do to get access to the TimblAPI functions is to
include {\tt TimblAPI.h} in the program, and to include {\tt
  libTimbl.a} in your linking path.

{\bf Important note}: The described functions return a result (mostly
a bool) to indicate succes or failure. To simplify the examples, we
ignore these return values. This is, of course, bad practice, to be avoided in
real life programming.\footnote{as stated by commandment 6 of ``The
  Ten Commandments for C Programmers''' by Henry Spencer:	

If a function be advertised to return an error code in the event of
difficulties, thou shalt check for that code, yea, even though the
checks triple the size of thy code and produce aches in thy typing
fingers, for if thou thinkest ``it cannot happen to me'', the gods
shall surely punish thee for thy arrogance.}

{\bf Warning}: Although the TiMBL internals perform some sanity
checking, it is quite possible to combine API functions such
that some undetermined state is reached, or even a conflict
arises. The effect of the {\tt SetOptions()} function, for instance,
might be quite surprising. If you have created your own program
with the API it might be wise to test against well-know data to see if
the results make sense.

\chapter{Changes}
\label{changes}

\section{From version 6.2 to 6.3}

No changes to the API are made for this release. This Manual is made
up to date (preserving the beta-state).
 
\section{From version 6.1 to 6.2}

In version 6.2, some additional functions were added to the API: {\tt
  matchDepth()}, {\tt matchedAtLeaf()}, {\tt WriteMatrices()}, {\tt
  GetMatrices()} and {\tt ShowStatistics()}. These reflect the
additional functionality of Timbl 6.2.  The API is still experimental,
and contains more functions than described in this manual. Using these
`undocumented' features is, as usual, unwise.

\section{From version 5.1 to 6.1}

The major change in 6.0 is the introduction of the {\tt neighborSet}
class, with some special Classify functions.  We added Classify
functions that deliver pointers into Timbl's internal data. This is
fast, but dangerous.  Also, a {\tt WriteInstanceBaseXml()} function is
added, which comes in handy when you want to know more about the
instance base.  Two more examples demonstrating neighborSets and such
are added in Appendix B. From version 6.0 to 6.1, the API has not changed.

\section{From version 5.0 to 5.1}

The API is quite stable at the moment. Most TiMBL changes did not
affect the API. The only real API change is in the {\tt GetWeights()}
function. (see the section on Storing and retrieving intermediate
results).  A few options were added to Timbl, influencing the table in
Appendix A. We have also changed and enhanced the examples in Appendix
B.

\chapter{Quick-start}
\section{Setting up an experiment}

There is just one way to start a TiMBL experiment, which is to call
the TimblAPI constructor:

\begin{footnotesize}
\begin{verbatim}
  TimblAPI( const std::string& args, const std::string& name ="" );
\end{verbatim}
\end{footnotesize}

args is used as a "command line" and is parsed for all kind of options
which are used to create the right kind of experiment with the desired
settings for metric, weighting etc. If something is wrong with the
settings, {\em no}\/ object is created.

The most important option is {\tt -a}  to set the kind of algorithm,
e.g. {\tt -a IB1} to invoke an IB1 experiment or {\tt -a IGTREE} to invoke an IGTREE
experiment. A list of possible options is give in Appendix A.

The optional name can be useful if you have multiple experiments.
In case of warnings or errors, this name is appended to the message.

For example:

\begin{footnotesize}
\begin{verbatim}
  TimblAPI *My_Experiment = new TimblAPI( "-a IGTREE +vDI+DB", 
                                          "test1" );
\end{verbatim}
\end{footnotesize}

{\tt My\_Experiment} is created as an IGTREE experiment with the name
"test1", and the verbosity is set to DI+DB, meaning that the output
will contain DIstance and DistriBution information.

The counterpart to creation is the {\tt \~{ }TimblAPI()} destructor,
which is called when you delete an experiment:

\begin{footnotesize}
\begin{verbatim}
  delete My_Experiment;
\end{verbatim}
\end{footnotesize}

\section{Running an experiment}

Assuming that we have appropriate datafiles (such as the example files {\tt
dimin.train} and {\tt dimin.test} in the TiMBL package), we can get
started right away with the functions {\tt Learn()} and {\tt Test()}.

\subsection{Training}
\begin{footnotesize}
\begin{verbatim}
  bool Learn( const std::string& f );
\end{verbatim}
\end{footnotesize}

This function takes a file with name 'f', and gathers information
such as: number of features, number and frequency of feature values and
the same for class names. After that, these data are used to calculate
a lot of statistical information, which will be used for
testing. Finally, an InstanceBase is created, tuned to the current
algorithm.

\subsection{Testing}
\begin{footnotesize}
\begin{verbatim}
  bool Test( const std::string& in,
             const std::string& out,
             const std::string& perc = "" );
\end{verbatim}
\end{footnotesize}

Test a file given by 'in' and write results to 'out'. If 'perc' is not
empty, then a percentage score is written to file 'perc'.

For example:

\begin{footnotesize}
\begin{verbatim}
  My_Experiment->Learn( "dimin.train" );  
  My_Experiment->Test( "dimin.test", "my_first_test" );  
\end{verbatim}
\end{footnotesize}

An InstanceBase will be created from dimin.train, then dimin.test is
tested against that InstanceBase and output is written to
my\_first\_test.

\subsection{Special cases of {\tt Learn()} and {\tt Test()}}

There are special cases where {\tt Learn()} behaves differently:

\begin{itemize}
\item When the algorithm is IB2, {\tt Learn()} will automatically take
  the first $n$ lines of f (set with the {\tt -b n} option) to
  bootstrap itself, and then the rest of f for IB2-learning. After
  Learning IB2, you can use {\tt Test()} as usual.

\item When the algorithm is CV, {\tt Learn()} is not defined, and all
  work is done in a special version of {\tt Test()}. 'f' is assumed to
  give the name of a file, which, on separate lines, gives the names
  of the files to be cross-validated.

  Also, if {\em featureWeights}\/ or {\em probabilities}\/ are read from 
  user-defined datafiles, a special {\tt CVprepare()} function must be called, 
  to make the weigthing, weightFilename and probabilityFileName known to the 
{\tt Test()} function.

See Appendix B for a complete CV example (program {\tt api\_test3}).

%TODO: een voorbeeld met CVPrepare erbij!

\end{itemize}

\section{More about settings}

After an experiment is set up with the TimblAPI constructor, many
options can be changed "on the fly" with:

\begin{footnotesize}
\begin{verbatim}
  bool SetOptions( const std::string& opts );
\end{verbatim}
\end{footnotesize}

Here, `opts' is interpreted as a list of option settings, just like in
the TimblAPI constructor. When an error in the opts string is found,
{\tt SetOptions()} returns false. Whether any options are really set
or changed in that case is undefined. Note that a few options can only
be set {\em once}\/ when creating the experiment, most notably the
algorithm. Any attempt to change these options will result in a
failure.  See Appendix A for all valid options and information about
the possibility to change them within a running experiment.

Note: {\tt SetOptions()} is lazy; changes are cached until the
moment they are really needed, so you can do several {\tt SetOptions()}
calls with even different values for the same option. Only the last
one seen will be used for running the experiment.

To see which options are in effect, you can use the calls {\tt ShowOptions()}
and {\tt ShowSettings()}.

\begin{footnotesize}
\begin{verbatim}
  bool ShowOptions( std::ostream& );
\end{verbatim}
\end{footnotesize}

Shows all options with their possible and current values.

\begin{footnotesize}
\begin{verbatim}
  bool ShowSettings( std::ostream& );
\end{verbatim}
\end{footnotesize}

Shows all options and their currect values.

For example:

\begin{footnotesize}
\begin{verbatim}
  My_Experiment->SetOptions( "-w2 -m:M" );
  My_Experiment->SetOptions( "-w3 -v:DB" );
  My_Experiment->ShowSettings( cout )
\end{verbatim}
\end{footnotesize}

See Appendix B (program {\tt api\_test1}) for the output.

\section{Storing and retrieving intermediate results}

To speed up testing, or to manipulate what is happening internally, we
can store and retrieve several important parts of our experiment: The
InstanceBase, the FeatureWeights, the ProbabilityArrays and the ValueDistance Matrices.

Saving is done with:

\begin{footnotesize}
\begin{verbatim}
  bool WriteInstanceBase( const std::string& f );
  bool SaveWeights( const std::string& f );
  bool WriteArrays( const std::string& f );
  bool WriteMatrices( const std::string& f );
\end{verbatim}
\end{footnotesize}

Retrieve with their counterparts:

\begin{footnotesize}
\begin{verbatim}
  bool GetInstanceBase( const std::string& f );
  bool GetWeights( const std::string& f, Weighting w );
  bool GetArrays( const std::string& f );
  bool GetMatrices( const std::string& f );
\end{verbatim}
\end{footnotesize}

All use `f' as a filename for storing/retrieving. {\tt GetWeights} needs
information to decide {\em which}\/ weighting to retrieve.
Weighting is defined as the enumerated type:

\begin{footnotesize}
\begin{verbatim}
  enum Weighting { UNKNOWN_W, UD, NW, GR, IG, X2, SV };
\end{verbatim}
\end{footnotesize}

Some notes:

\begin{enumerate}
\item The InstanceBase is stored in a internal format, with or without
hashing, depending on the {\tt -H} option. The format is described in the
TiMBL manual. Remember that it is a bad idea to edit this file in any way.
\item {\tt GetWeights()} can be used to override the weights that
{\tt Learn()} calculated. {\tt UNKNOWN\_W} should not be used.
\item The Probability arrays are described in the TiMBL manual. They can be
manipulated to tune the MVDM similarity metric.
\end{enumerate}

If you like you may dump the Instancebase in an XML format. No Retrieve
function is available for this format.

\begin{footnotesize}
\begin{verbatim}
  bool WriteInstanceBaseXml( const std::string& f );
\end{verbatim}
\end{footnotesize}

\chapter{Classify functions}

\section{Classify functions: Elementary}
After an experiment is trained with {\tt Learn()}, we do not have to use
{\tt Test()} to do bulk-testing on a file.
We can create our own tests with the {\tt Classify} functions:

\begin{footnotesize}
\begin{verbatim}
  bool Classify( const std::string& Line, std::string& result );
  bool Classify( const std::string& Line, std::string& result, 
                 double& distance );
  bool Classify( const std::string& Line, std::string& result,
                 std::string& Distrib, double& distance );
\end{verbatim}
\end{footnotesize}

Results are stored in 'result' (the assigned class). 'distance' will
get the calculated distance, and 'Distrib' the distribution at
'distance' which is used to calculate 'result'.  Distrib will be a
string like ``\{ NP 2, PP 6 \}''. It is up to you to parse and
interpret this. (In this case: There were 8 classes assigned at
'distance', 2 NP's and 6 PP's, giving a 'result' of ``PP''.)

If you want to perform analyses on these distributions, it might be a
good idea to read the next section about the other range of Classify()
functions.

A main disadvantage compared to using {\tt Test()} is that {\tt
  Test()} is optimized.  {\tt Classify()} has to test for sanity of
its input and also whether a {\tt SetOptions()} has been
performed. This slows down the process.

A good example of the use of {\tt Classify()} is the {\tt
 classify.cxx} program in the TiMBL Distribution.

Depending on the Algorithm and Verbosity setting, it may be possible
to get some extra information on the details of each classification
using:

\begin{footnotesize}
\begin{verbatim}
   const bool ShowBestNeighbors( std::ostream& os, bool distr ) const;
\end{verbatim}
\end{footnotesize}

Provided that the option {\tt +v n} or {\tt +v k} is set and we use
IB1 or IB2, output is produced similar to what we see in the TiMBL
program.  When 'distr' is true, their distributions are also
displayed.  Bear in mind: The {\tt +vn} option is expensive in time
and memory and does not work for IGTREE, TRIBL, and TRIBL2.

Two other functions provide the results as given by the {\tt +vmd} verbosity 
option:

\begin{footnotesize}
\begin{verbatim}
    size_t matchDepth() const;
    bool matchedAtLeaf() const;
\end{verbatim}
\end{footnotesize}

The first returns the matching Depth in the InstanceBase; the second 
flags whether it was a Leaf or a Non-Terminal Node.

\section{Classify functions: Advanced}

A faster, but more dangerous version of Classify is also available.
It is faster because it returns pointers into Timbl's internal
datastructures. It is dangerous because it returns pointers into
Timbl's internal datastructures (using 'const' pointers, so it is
fortunately difficult te really damage Timbl)

\begin{footnotesize}
\begin{verbatim}
  const TargetValue *Classify( const std::string& );
  const TargetValue *Classify( const std::string&, 
                               const ValueDistribution *& );
  const TargetValue *Classify( const std::string&, double& );
  const TargetValue *Classify( const std::string&, 
                               const ValueDistribution *&, 
                               double& );
\end{verbatim}
\end{footnotesize}

A ValueDistribution is a list-like object (but it is not a real list!)
that contains TargetValues objects and weights. It is the result of
combining all nearest neighbors and applying the desired weightings.
Timbl chooses a best TargetValue from this ValueDistribution and the
Classify functions return that as their main result.

{\bf Important}: Because these functions return pointers into Timbl's
internal representation, the results are only valid until the next
Classify function is called (or the experiment is deleted).

Both the TargetValue and ValueDistribution objects have output
operators defined, so you can print them.  TargetValue also has a {\tt
  Name()} function, which returns a std::string so you can collect
results.  ValueDistribution has an iterator-like interface which makes
it possible to walk through the Distribution.

An iterator on a {\tt ValueDistribution *vd} is created like this:
\begin{footnotesize}
\begin{verbatim}
  ValueDistribution::dist_iterator it=vd->begin();
\end{verbatim}
\end{footnotesize}

Unfortunately, the iterator cannot be printed or used directly.
It walks through a map-like structure with pairs of values, of which
only the {\tt second} part is of interest to you.
You may print it, or extract its {\tt Value()} (which happens to be a
TargetValue pointer) or extract its {\tt Weight()}, which is a {\tt double}.

Like this:
\begin{footnotesize}
\begin{verbatim}
  while ( it != vd->end() ){
    cout << it->second << " has a value: ";
    cout << it->second->Value() << " an a weight of "
         << it->second->Weight() << endl;
    ++it;
  }
\end{verbatim}
\end{footnotesize}

Printing {\tt it->second} is the same as printing the
TargetValue plus its Weight.

In the {\em demos}\/ directory you will find a complete example in api\_test6.

{\bf Warning}: it is possible to search the Timbl code for the
internal representation of the TargetValue and ValueDistribution
objects, but please DON'T DO THAT.  The representation might change
between Timbl versions.

\section{Classify functions: neighborSets}

A more flexible way of classifying is to use one of these functions:

\begin{footnotesize}
\begin{verbatim}
  const neighborSet *classifyNS( const std::string& );
  bool classifyNS( const std::string&, neighborSet& );
\end{verbatim}
\end{footnotesize}

The first function will classify an instance and return a pointer to a
{\tt neighborSet} object. This object may be seen as an container
which holds both distances and distributions up to a certain depth,
(which is {\em at least}\/ the number of neighbors (-k option) that
was used for the classifying task.)  It is a const object, so you
cannot directly manipulate its internals, but there are some
functions defined to get useful information out of the neighborSet.

Important:  The neighborSet {\em will be overwritten}\/ on the next
call to any of the classify functions. Be sure to get all the
results out before that happens.

To make life easy, a second variant can be used, which fills a
neighborSet object that you provide (the same could be achieved by a
copy of the result of the first function). 

{\bf Note}: NeighborSets can be large, and copying therefore
expensive, so you should only do this if you really have to.

\subsection{How to get results from a neighborSet}

No metric functions (such as exponential decay and the like) are
performed on the neighborSet. You are free to insert your own metrics, or
use Timbls built-in metrics.

\begin{footnotesize}
\begin{verbatim}
  double getDistance( size_t n ) const;
  double bestDistance() const;
  const ValueDistribution *getDistribution( size_t n ) const;
  ValueDistribution *bestDistribution( const decayStruct * ds=0,
                                       size_t n=0 ) const ;
\end{verbatim}
\end{footnotesize}

{\tt getDistance( n )} will return the distance of the neighbor(s) at n.
{\tt bestDistance()} is simply {\tt getDistance(0)}.

{\tt getDistribution( n )} will return the distribution of neighbor(s) at
n.

{\tt bestDistribution()} will return the Weighted distribution
calculated using the first n elements in the container and a metric
specified by the {\tt decayStruct}.  The default n=0, means: use the
whole container. An empty decay struct means zeroDecay.

The returned ValueDistribution object is handed to you, and you are
responsible for deleting it after using it (see the previous section
for more details about ValueDistributions).

A decayStruct is one of:

\begin{footnotesize}
\begin{verbatim}
  class zeroDecay();
  class invLinDecay();
  class invDistDecay();
  class expDecay( double alpha );
  class expDecay( double alpha, double beta );
\end{verbatim}
\end{footnotesize}
 
For example, to get a ValueDistribution form a neighborSet {\tt nb}, using
3 neighbors and exponential decay with alpha=0.3, you can do:

\begin{footnotesize}
\begin{verbatim}
  decayStruct *dc = new  expDecay(0.3);
  ValueDistribution *vd = nb->bestDistribution( dc, 3 );
\end{verbatim}
\end{footnotesize}


\subsection{Useful operations on neighborSet objects}

You can print neighborSet objects:

\begin{footnotesize}
\begin{verbatim}
    std::ostream& operator<<( std::ostream&, const neighborSet& );
    std::ostream& operator<<( std::ostream&, const neighborSet * );
\end{verbatim}
\end{footnotesize}

You may create a neighborSet yourself, and assign and delete them:

\begin{footnotesize}
\begin{verbatim}
    neighborSet();
    neighborSet( const neighborSet& );
    neighborSet& operator=( const neighborSet& );
    ~neighborSet();
\end{verbatim}
\end{footnotesize}

If you create an neighborSet, you might want to reserve space for it,
to avoid needless reallocations. Also it can be cleared, and you can
ask the size (just like with normal containers):

\begin{footnotesize}
\begin{verbatim}
    void reserve( size_t );
    void clear();
    size_t size() const;
\end{verbatim}
\end{footnotesize}

Two neighborSets can be merged:

\begin{footnotesize}
\begin{verbatim}
    void merge( const neighborSet& );
\end{verbatim}
\end{footnotesize}

A neighborSet can be truncated at a certain level. This is useful
after merging neighborSets. Merging sets with depth k and n will
result in a set with a depth somewhere within the range $[max(k,n), k+n]$.

\begin{footnotesize}
\begin{verbatim}
    void truncate( size_t );
\end{verbatim}
\end{footnotesize}

\chapter{Advanced Functions}

\section{Modifying the InstanceBase}

The instanceBase can be modified with the functions:

\begin{footnotesize}
\begin{verbatim}
  bool Increment( const std::string& Line ); 
  bool Decrement( const std::string& Line ); 
\end{verbatim}
\end{footnotesize}

These functions add an Instance (as described by Line) to the
InstanceBase, or remove it.  This can only be done for IB1-like
experiments (IB1, IB2, CV and LOO), and enforces a lot of
statistical recalculations.

More sophisticated are:

\begin{footnotesize}
\begin{verbatim}
  bool Expand( const std::string& File  );
  bool Remove( const std::string& File );
\end{verbatim}
\end{footnotesize}

which use the contents of File to do a bulk of Increments or Decrements, and
recalculate afterwards.

\section{Getting more information out of Timbl}

There are a few convenience functions to get extra information on
TiMBL and its behaviour:

\begin{footnotesize}
\begin{verbatim}
  bool WriteNamesFile( const std::string& f );
\end{verbatim}
\end{footnotesize}

Create a file which resembles a C4.5 namesfile.

\begin{footnotesize}
\begin{verbatim}
  Algorithm Algo()
\end{verbatim}
\end{footnotesize}

Give the current algorithm as a type enum Algorithm. First, the
declaration of the Algorithm type:

\begin{footnotesize}
\begin{verbatim}
  enum Algorithm { UNKNOWN_ALG, IB1, IB2, IGTREE, 
                   TRIBL, TRIBL2, LOO, CV };
\end{verbatim}
\end{footnotesize}

This can be printed with the helper function: 

\begin{footnotesize}
\begin{verbatim}
  const std::string to_string( const Algorithm )
\end{verbatim}
\end{footnotesize}

\begin{footnotesize}
\begin{verbatim}
  Weighting CurrentWeighting()
\end{verbatim}
\end{footnotesize}

Gives the current weighting as a type enum Weighting.

Declaration of Weighting:

\begin{footnotesize}
\begin{verbatim}
  enum Weighting { UNKNOWN_W, UD, NW, GR, IG, X2, SV };
\end{verbatim}
\end{footnotesize}

This can be printed with the helper function: 

\begin{footnotesize}
\begin{verbatim}
  const std::string to_string( const Weighting )
\end{verbatim}
\end{footnotesize}


\begin{footnotesize}
\begin{verbatim}
  Weighting CurrentWeightings( std::vector<double>& v )
\end{verbatim}
\end{footnotesize}

Returns the current weighting as a type enum Weighting and also a
vector v with all the current values of this weighting.

\begin{footnotesize}
\begin{verbatim}
  std::string& ExpName()
\end{verbatim}
\end{footnotesize}

Returns the value of 'name' given at the construction of the experiment

\begin{footnotesize}
\begin{verbatim}
  static std::string VersionInfo( bool full = false )
\end{verbatim}
\end{footnotesize}

Returns a string containing the Version number, the Revision and the
Revision string of the current API implementation. If full is true,
also information about the date and time of compilation is included.

\chapter{Server mode}
\label{Using TiMBL as a Server}

\begin{footnotesize}
\begin{verbatim}
  bool StartServer( const int port, const int max_c );
\end{verbatim}
\end{footnotesize}

Starts a TimblServer on 'port' with maximally 'max\_c' concurrent
connections to it. Starting a server makes sense only after the
experiment is trained.

\clearpage
\chapter{Annotated example programs}

\subsection{example 1, {\tt api\_test1.cxx}}
\begin{footnotesize}
\begin{verbatim}	
#include "TimblAPI.h"
int main(){
  TimblAPI My_Experiment( "-a IGTREE +vDI+DB+F", "test1" );
  My_Experiment.SetOptions( "-w3 -vDB" );
  My_Experiment.ShowSettings( std::cout );
  My_Experiment.Learn( "dimin.train" );  
  My_Experiment.Test( "dimin.test", "my_first_test.out" );  
  My_Experiment.SetOptions( "-mM" );
  My_Experiment.Test( "dimin.test", "my_first_test.out" );  
}
\end{verbatim}
\end{footnotesize}


Output:
\begin{footnotesize}
\begin{verbatim}
Current Experiment Settings :
FLENGTH              : 0
MAXBESTS             : 500
TRIBL_OFFSET         : 0
INPUTFORMAT          : Unknown
TREE_ORDER           : Unknown
ALL_WEIGHTS          : false
WEIGHTING            : x2                               [Note 1]
BIN_SIZE             : 20
IB2_OFFSET           : 0
KEEP_DISTRIBUTIONS   : false
DO_SLOPPY_LOO        : false
TARGET_POS           : 18446744073709551615
DO_SILLY             : false
DO_DIVERSIFY         : false
DECAY                : Z
SEED                 : -1
BEAM_SIZE            : 0
DECAYPARAM_A         : 1.00000
DECAYPARAM_B         : 1.00000
NORMALISATION        : None
NORMFACTOR           : 1.00000
EXEMPLAR_WEIGHTS     : false
IGNORE_EXEMPLAR_WEIGHTS : true
NO_EXEMPLAR_WEIGHTS_TEST : true
VERBOSITY            : F+DI                             [Note 2]
EXACT_MATCH          : false
HASHED_TREE          : true
GLOBAL_METRIC        : O
METRICS              : 
MVD_LIMIT            : 1
NEIGHBORS            : 1
PROGRESS             : 100000
CLIP_FACTOR          : 10

Examine datafile 'dimin.train' gave the following results:
Number of Features: 12
InputFormat       : C4.5

-test1-Phase 1: Reading Datafile: dimin.train
-test1-Start:          0 @ Mon May 31 11:03:34 2010
-test1-Finished:    2999 @ Mon May 31 11:03:34 2010
-test1-Calculating Entropy         Mon May 31 11:03:34 2010
Lines of data     : 2999
DB Entropy        : 1.6178929
Number of Classes : 5

Feats   Vals    X-square        Variance        InfoGain        GainRatio
    1      3    128.41828       0.021410184     0.030971064     0.024891536
    2     50    364.75812       0.030406645     0.060860038     0.027552191
    3     19    212.29804       0.017697402     0.039562857     0.018676787
    4     37    449.83823       0.037499019     0.052541227     0.052620750
    5      3    288.87218       0.048161417     0.074523225     0.047699231
    6     61    415.64113       0.034648310     0.10604433      0.024471911
    7     20    501.33465       0.041791818     0.12348668      0.034953203
    8     69    367.66021       0.030648567     0.097198760     0.043983864
    9      2    169.36962       0.056475363     0.045752381     0.046816705
   10     64    914.61906       0.076243669     0.21388759      0.042844587
   11     18    2807.0418       0.23399815      0.66970458      0.18507018
   12     43    7160.3682       0.59689631      1.2780762       0.32537181

Feature Permutation based on Chi-Squared :
< 12, 11, 10, 7, 4, 6, 8, 2, 5, 3, 9, 1 >
-test1-Phase 2: Building index on Datafile: dimin.train
-test1-Start:          0 @ Mon May 31 11:03:34 2010
-test1-Finished:    2999 @ Mon May 31 11:03:34 2010
-test1-
Phase 3: Learning from Datafile: dimin.train
-test1-Start:          0 @ Mon May 31 11:03:34 2010
-test1-Finished:    2999 @ Mon May 31 11:03:34 2010

Size of InstanceBase = 148 Nodes, (5920 bytes), 99.61 % compression
Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5


Starting to test, Testfile: dimin.test
Writing output in:          my_first_test.out
Algorithm     : IGTree
Weighting     : Chi-square
Feature 1        : 128.418283576224439
Feature 2        : 364.758115277811896
Feature 3        : 212.298037236345095
Feature 4        : 449.838231470681876
Feature 5        : 288.872176256387263
Feature 6        : 415.641126446691771
Feature 7        : 501.334653478280984
Feature 8        : 367.660212489714240
Feature 9        : 169.369615106487458
Feature 10       : 914.619058199288816
Feature 11       : 2807.041753278295346
Feature 12       : 7160.368151902808677

-test1-Tested:      1 @ Mon May 31 11:03:34 2010
-test1-Tested:      2 @ Mon May 31 11:03:34 2010
-test1-Tested:      3 @ Mon May 31 11:03:34 2010
-test1-Tested:      4 @ Mon May 31 11:03:34 2010
-test1-Tested:      5 @ Mon May 31 11:03:34 2010
-test1-Tested:      6 @ Mon May 31 11:03:34 2010
-test1-Tested:      7 @ Mon May 31 11:03:34 2010
-test1-Tested:      8 @ Mon May 31 11:03:34 2010
-test1-Tested:      9 @ Mon May 31 11:03:34 2010
-test1-Tested:     10 @ Mon May 31 11:03:34 2010
-test1-Tested:    100 @ Mon May 31 11:03:34 2010
-test1-Ready:     950 @ Mon May 31 11:03:34 2010
Seconds taken: 0.1331 (7135.13 p/s)

overall accuracy:        0.962105  (914/950)
Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5

Warning:-test1-Metric must be Overlap for IGTree test.     [Note 3]

\end{verbatim}
\end{footnotesize}


Notes:
\begin{enumerate}
\item The {\tt -w2} of the first {\tt SetOptions()} is overruled with
  {\tt -w3} from the second {\tt SetOptions()}, resulting in a
  weighting of 3 or Chi-Square. 
\item The first {\tt SetOptions()} sets the verbosity with {\tt +F+DI+DB}.
The second {\tt SetOptions()}, however, sets the verbosity with {\tt -vDB}, and the resulting verbosity is therefore {\tt F+DI}.
\item Due to the second {\tt SetOptions()}, the default metric is set to
MVDM --- this is however not applicable to IGTREE. This raises a warning
when we start to test.
\end{enumerate}

Result in my\_first\_test.out (first 20 lines):
\begin{footnotesize}
\begin{verbatim}
=,=,=,=,=,=,=,=,+,p,e,=,T,T        6619.8512628162
=,=,=,=,+,k,u,=,-,bl,u,m,E,P        2396.8557978603
+,m,I,=,-,d,A,G,-,d,},t,J,J        6619.8512628162
-,t,@,=,-,l,|,=,-,G,@,n,T,T        6619.8512628162
-,=,I,n,-,str,y,=,+,m,E,nt,J,J        6619.8512628162
=,=,=,=,=,=,=,=,+,br,L,t,J,J        6619.8512628162
=,=,=,=,+,zw,A,=,-,m,@,r,T,T        6619.8512628162
=,=,=,=,-,f,u,=,+,dr,a,l,T,T        6619.8512628162
=,=,=,=,=,=,=,=,+,l,e,w,T,T        13780.219414719
=,=,=,=,+,tr,K,N,-,k,a,rt,J,J        6619.8512628162
=,=,=,=,+,=,o,=,-,p,u,=,T,T        3812.8095095379
=,=,=,=,=,=,=,=,+,l,A,m,E,E        3812.8095095379
=,=,=,=,=,=,=,=,+,l,A,p,J,J        6619.8512628162
=,=,=,=,=,=,=,=,+,sx,E,lm,P,P        6619.8512628162
+,l,a,=,-,d,@,=,-,k,A,st,J,J        6619.8512628162
-,s,i,=,-,f,E,r,-,st,O,k,J,J        6619.8512628162
=,=,=,=,=,=,=,=,+,sp,a,n,T,T        6619.8512628162
=,=,=,=,=,=,=,=,+,st,o,t,J,J        6619.8512628162
=,=,=,=,+,sp,a,r,-,b,u,k,J,J        6619.8512628162
+,h,I,N,-,k,@,l,-,bl,O,k,J,J        6619.8512628162
\end{verbatim}
\end{footnotesize}
\clearpage

\subsection{example 2, {\tt api\_test2.cxx}}

This demonstrates IB2 learning. Our example program:

\begin{footnotesize}
\begin{verbatim}
#include "TimblAPI.h"
int main(){
  TimblAPI *My_Experiment = new TimblAPI( "-a IB2 +vF+DI+DB" , 
                                          "test2" );
  My_Experiment->SetOptions( "-b100" );
  My_Experiment->ShowSettings( std::cout );
  My_Experiment->Learn( "dimin.train" );  
  My_Experiment->Test( "dimin.test", "my_second_test.out" );
  delete My_Experiment;
  exit(1);
}
\end{verbatim}
\end{footnotesize}

We create an experiment for the IB2 algorithm, with the {\tt -b} option set
to 100, so the first 100 lines of {\tt dimin.train} will be used to
bootstrap the learning, as we can see from the output:

\begin{footnotesize}
\begin{verbatim}
Current Experiment Settings :
FLENGTH              : 0
MAXBESTS             : 500
TRIBL_OFFSET         : 0
INPUTFORMAT          : Unknown
TREE_ORDER           : G/V
ALL_WEIGHTS          : false
WEIGHTING            : gr
BIN_SIZE             : 20
IB2_OFFSET           : 100
KEEP_DISTRIBUTIONS   : false
DO_SLOPPY_LOO        : false
TARGET_POS           : 4294967295
DO_SILLY             : false
DO_DIVERSIFY         : false
DECAY                : Z
SEED                 : -1
BEAM_SIZE            : 0
DECAYPARAM_A         : 1.00000
DECAYPARAM_B         : 1.00000
NORMALISATION        : None
NORM_FACTOR          : 1.00000
EXEMPLAR_WEIGHTS     : false
IGNORE_EXEMPLAR_WEIGHTS : true
NO_EXEMPLAR_WEIGHTS_TEST : true
VERBOSITY            : F+DI+DB
EXACT_MATCH          : false
HASHED_TREE          : true
GLOBAL_METRIC        : O
METRICS              :
MVD_LIMIT            : 1
NEIGHBORS            : 1
PROGRESS             : 100000
CLIP_FACTOR          : 10

Examine datafile 'dimin.train' gave the following results:
Number of Features: 12
InputFormat       : C4.5

-test2-Phase 1: Reading Datafile: dimin.train
-test2-Start:          0 @ Mon May 31 11:03:34 2010
-test2-Finished:    2999 @ Mon May 31 11:03:34 2010
-test2-Calculating Entropy         Mon May 31 11:03:34 2010
Lines of data     : 2999                                  [Note 1]
DB Entropy        : 1.6178929
Number of Classes : 5

Feats	Vals	InfoGain	GainRatio
    1      3	0.030971064	0.024891536
    2     50	0.060860038	0.027552191
    3     19	0.039562857	0.018676787
    4     37	0.052541227	0.052620750
    5      3	0.074523225	0.047699231
    6     61	0.10604433	0.024471911
    7     20	0.12348668	0.034953203
    8     69	0.097198760	0.043983864
    9      2	0.045752381	0.046816705
   10     64	0.21388759	0.042844587
   11     18	0.66970458	0.18507018
   12     43	1.2780762	0.32537181

Feature Permutation based on GainRatio/Values :
< 9, 5, 11, 1, 12, 7, 4, 3, 10, 8, 2, 6 >
-test2-Phase 2: Learning from Datafile: dimin.train
-test2-Start:          0 @ Mon May 31 11:03:34 2010
-test2-Finished:     100 @ Mon May 31 11:03:34 2010

Size of InstanceBase = 954 Nodes, (38160 bytes), 26.62 % compression
-test2-Phase 2: Appending from Datafile: dimin.train (starting at line 101)
-test2-Start:        101 @ Mon May 31 11:03:34 2010
-test2-Learning:     101 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     102 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     103 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     104 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     105 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     106 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     107 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     108 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     109 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     110 @ Mon May 31 11:03:34 2010	 added:0
-test2-Learning:     200 @ Mon May 31 11:03:34 2010	 added:9
-test2-Learning:    1100 @ Mon May 31 11:03:34 2010	 added:66
-test2-Finished:    2999 @ Mon May 31 11:03:35 2010

in total added 173 new entries                                      [Note 2]

Size of InstanceBase = 2232 Nodes, (89280 bytes), 32.40 % compression
DB Entropy        : 1.61789286
Number of Classes : 5

Feats	Vals	InfoGain	GainRatio
    1      3	0.03097106	0.02489154
    2     50	0.06086004	0.02755219
    3     19	0.03956286	0.01867679
    4     37	0.05254123	0.05262075
    5      3	0.07452322	0.04769923
    6     61	0.10604433	0.02447191
    7     20	0.12348668	0.03495320
    8     69	0.09719876	0.04398386
    9      2	0.04575238	0.04681670
   10     64	0.21388759	0.04284459
   11     18	0.66970458	0.18507018
   12     43	1.27807625	0.32537181

Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5


Starting to test, Testfile: dimin.test
Writing output in:          my_second_test.out
Algorithm     : IB2
Global metric : Overlap
Deviant Feature Metrics:(none)
Weighting     : GainRatio
Feature 1	 : 0.026241147173103
Feature 2	 : 0.030918769841214
Feature 3	 : 0.021445836516602
Feature 4	 : 0.056561885447060
Feature 5	 : 0.048311436541460
Feature 6	 : 0.027043360641622
Feature 7	 : 0.037453180788027
Feature 8	 : 0.044999091421718
Feature 9	 : 0.048992032381874
Feature 10	 : 0.044544230779268
Feature 11	 : 0.185449683494634
Feature 12	 : 0.324719540921155

-test2-Tested:      1 @ Mon May 31 11:03:35 2010
-test2-Tested:      2 @ Mon May 31 11:03:35 2010
-test2-Tested:      3 @ Mon May 31 11:03:35 2010
-test2-Tested:      4 @ Mon May 31 11:03:35 2010
-test2-Tested:      5 @ Mon May 31 11:03:35 2010
-test2-Tested:      6 @ Mon May 31 11:03:35 2010
-test2-Tested:      7 @ Mon May 31 11:03:35 2010
-test2-Tested:      8 @ Mon May 31 11:03:35 2010
-test2-Tested:      9 @ Mon May 31 11:03:35 2010
-test2-Tested:     10 @ Mon May 31 11:03:35 2010
-test2-Tested:    100 @ Mon May 31 11:03:35 2010
-test2-Ready:     950 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0456 (20826.48 p/s)

overall accuracy:        0.941053  (894/950), of which 15 exact matches 
                                                         [Note 3]
There were 43 ties of which 32 (74.42%) were correctly resolved
\end{verbatim}
\end{footnotesize}


Notes:
\begin{enumerate}
\item IB2 is bootstrapped with 100 lines, but for the statistics all 2999
 lines are used.
\item As we see here, 173 entries from the input file had a mismatch,
and were therefore entered in the Instancebase.
\item We see that IB2 scores 94.11 \%, compared to 96.21 \% for IGTREE
  in our first example.  For this data, IB2 is not a good
  algorithm. However, it saves a lot of space, and is faster than
  IB1. Yet, IGTREE is both faster and better. Had we used IB1, the
  score would have been 96.84 \%.
\end{enumerate}
\clearpage

\subsection{example 3, {\tt api\_test3.cxx}}

This demonstrates Cross Validation. Let's try the following program:

\begin{footnotesize}
\begin{verbatim}
#include "TimblAPI.h"
using Timbl::TimblAPI;

int main(){
  TimblAPI *My_Experiment = new TimblAPI( "-t cross_validate" );
  My_Experiment->Test( "cross_val.test" );  
  delete My_Experiment;
  exit(0);
}
\end{verbatim}
\end{footnotesize}

This program creates an experiment, which defaults to IB1 and because of the
special option ``-t cross\_validate'' will start a CrossValidation
experiment.\\
Learn() is not possible now. We must use a special form of Test().

``cross\_val.test'' is a file with the following content:
\begin{footnotesize}
\begin{verbatim}
small_1.train
small_2.train
small_3.train
small_4.train
small_5.train
\end{verbatim}
\end{footnotesize}


All these files contain an equal part of a bigger dataset, and
My\_Experiment will run a CrossValidation test between these files.
Note that output filenames are generated and that you cannot influence
that.

The output of this program is:

\begin{footnotesize}
\begin{verbatim}
Starting Cross validation test on files:
small_1.train
small_2.train
small_3.train
small_4.train
small_5.train
Examine datafile 'small_1.train' gave the following results:
Number of Features: 8
InputFormat       : C4.5


Starting to test, Testfile: small_1.train
Writing output in:          small_1.train.cv
Algorithm     : CV
Global metric : Overlap
Deviant Feature Metrics:(none)
Weighting     : GainRatio

Tested:      1 @ Mon May 31 11:03:35 2010
Tested:      2 @ Mon May 31 11:03:35 2010
Tested:      3 @ Mon May 31 11:03:35 2010
Tested:      4 @ Mon May 31 11:03:35 2010
Tested:      5 @ Mon May 31 11:03:35 2010
Tested:      6 @ Mon May 31 11:03:35 2010
Tested:      7 @ Mon May 31 11:03:35 2010
Tested:      8 @ Mon May 31 11:03:35 2010
Tested:      9 @ Mon May 31 11:03:35 2010
Tested:     10 @ Mon May 31 11:03:35 2010
Ready:      10 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0006 (16207.46 p/s)

overall accuracy:        0.800000  (8/10)
Examine datafile 'small_2.train' gave the following results:
Number of Features: 8
InputFormat       : C4.5


Starting to test, Testfile: small_2.train
Writing output in:          small_2.train.cv
Algorithm     : CV
Global metric : Overlap
Deviant Feature Metrics:(none)
Weighting     : GainRatio

Tested:      1 @ Mon May 31 11:03:35 2010
Tested:      2 @ Mon May 31 11:03:35 2010
Tested:      3 @ Mon May 31 11:03:35 2010
Tested:      4 @ Mon May 31 11:03:35 2010
Tested:      5 @ Mon May 31 11:03:35 2010
Tested:      6 @ Mon May 31 11:03:35 2010
Tested:      7 @ Mon May 31 11:03:35 2010
Tested:      8 @ Mon May 31 11:03:35 2010
Tested:      9 @ Mon May 31 11:03:35 2010
Tested:     10 @ Mon May 31 11:03:35 2010
Ready:      10 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0005 (19646.37 p/s)

overall accuracy:        0.800000  (8/10)
Examine datafile 'small_3.train' gave the following results:
Number of Features: 8
InputFormat       : C4.5


Starting to test, Testfile: small_3.train
Writing output in:          small_3.train.cv
Algorithm     : CV
Global metric : Overlap
Deviant Feature Metrics:(none)
Weighting     : GainRatio

Tested:      1 @ Mon May 31 11:03:35 2010
Tested:      2 @ Mon May 31 11:03:35 2010
Tested:      3 @ Mon May 31 11:03:35 2010
Tested:      4 @ Mon May 31 11:03:35 2010
Tested:      5 @ Mon May 31 11:03:35 2010
Tested:      6 @ Mon May 31 11:03:35 2010
Tested:      7 @ Mon May 31 11:03:35 2010
Tested:      8 @ Mon May 31 11:03:35 2010
Tested:      9 @ Mon May 31 11:03:35 2010
Tested:     10 @ Mon May 31 11:03:35 2010
Ready:      10 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0005 (20202.02 p/s)

overall accuracy:        0.900000  (9/10)
Examine datafile 'small_4.train' gave the following results:
Number of Features: 8
InputFormat       : C4.5


Starting to test, Testfile: small_4.train
Writing output in:          small_4.train.cv
Algorithm     : CV
Global metric : Overlap
Deviant Feature Metrics:(none)
Weighting     : GainRatio

Tested:      1 @ Mon May 31 11:03:35 2010
Tested:      2 @ Mon May 31 11:03:35 2010
Tested:      3 @ Mon May 31 11:03:35 2010
Tested:      4 @ Mon May 31 11:03:35 2010
Tested:      5 @ Mon May 31 11:03:35 2010
Tested:      6 @ Mon May 31 11:03:35 2010
Tested:      7 @ Mon May 31 11:03:35 2010
Tested:      8 @ Mon May 31 11:03:35 2010
Tested:      9 @ Mon May 31 11:03:35 2010
Tested:     10 @ Mon May 31 11:03:35 2010
Ready:      10 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0005 (19880.72 p/s)

overall accuracy:        0.800000  (8/10)
Examine datafile 'small_5.train' gave the following results:
Number of Features: 8
InputFormat       : C4.5


Starting to test, Testfile: small_5.train
Writing output in:          small_5.train.cv
Algorithm     : CV
Global metric : Overlap
Deviant Feature Metrics:(none)
Weighting     : GainRatio

Tested:      1 @ Mon May 31 11:03:35 2010
Tested:      2 @ Mon May 31 11:03:35 2010
Tested:      3 @ Mon May 31 11:03:35 2010
Tested:      4 @ Mon May 31 11:03:35 2010
Tested:      5 @ Mon May 31 11:03:35 2010
Tested:      6 @ Mon May 31 11:03:35 2010
Tested:      7 @ Mon May 31 11:03:35 2010
Tested:      8 @ Mon May 31 11:03:35 2010
Ready:       8 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0004 (19093.08 p/s)

overall accuracy:        1.000000  (8/8)
\end{verbatim}
\end{footnotesize}


What has happened here?

\begin{enumerate}
\item TiMBL trained itself with inputfiles small\_2.train through
small\_5.train. (in fact using the {\tt Expand()} API call.
\item Then TiMBL tested small\_1.train against the InstanceBase.
\item Next, small\_2.train is removed from the database (API call {\tt
Remove()} ) and small\_1.train is added.
\item Then small\_2.train is tested against the InstanceBase.
\item And so forth with small\_3.train $\ldots$
\end{enumerate}
\clearpage

\subsection{example 4, {\tt api\_test4.cxx}}

This program demonstrates adding and deleting of the InstanceBase.  It
also proves that weights are (re)calculated correctly each time (which
also explains why this is a time-consuming thing to do). After running
this program, wg.1.wgt should be equal to wg.5.wgt and wg.2.wgt equal to
wg.4.wgt . Important to note is also, that while we do not use a weighting
of X2 or SV here, only the ``simple'' weights are calculated and
stored.

Further, arr.1.arr should be equal to arr.5.arr and arr.2.arr should be equal
to arr.4.arr

First the program: 

\begin{footnotesize}
\begin{verbatim}
#include <iostream>
#include "TimblAPI.h"

int main(){
  TimblAPI *My_Experiment = new TimblAPI( "-a IB1 +vDI+DB +mM" , 
                                          "test4" );
  My_Experiment->ShowSettings( std::cout );
  My_Experiment->Learn( "dimin.train" );  
  My_Experiment->Test( "dimin.test", "inc1.out" );
  My_Experiment->SaveWeights( "wg.1.wgt" );  
  My_Experiment->WriteArrays( "arr.1.arr" );  
  My_Experiment->Increment( "=,=,=,=,+,k,e,=,-,r,@,l,T" );  
  My_Experiment->Test( "dimin.test", "inc2.out" );
  My_Experiment->SaveWeights( "wg.2.wgt" );  
  My_Experiment->WriteArrays( "arr.2.arr" );  
  My_Experiment->Increment( "+,zw,A,rt,-,k,O,p,-,n,O,n,E" );  
  My_Experiment->Test( "dimin.test", "inc3.out" );
  My_Experiment->SaveWeights( "wg.3.wgt" );  
  My_Experiment->WriteArrays( "arr.3.arr" );  
  My_Experiment->Decrement( "+,zw,A,rt,-,k,O,p,-,n,O,n,E" );  
  My_Experiment->Test( "dimin.test", "inc4.out" );
  My_Experiment->SaveWeights( "wg.4.wgt" );  
  My_Experiment->WriteArrays( "arr.4.arr" );  
  My_Experiment->Decrement( "=,=,=,=,+,k,e,=,-,r,@,l,T" );  
  My_Experiment->Test( "dimin.test", "inc5.out" );
  My_Experiment->SaveWeights( "wg.5.wgt" );  
  My_Experiment->WriteArrays( "arr.5.arr" );  
  delete My_Experiment;
  exit(1);
}
\end{verbatim}
\end{footnotesize}


This produces the following output:

\begin{footnotesize}
\begin{verbatim}
Current Experiment Settings :
FLENGTH              : 0
MAXBESTS             : 500
TRIBL_OFFSET         : 0
IG_THRESHOLD         : 1000
INPUTFORMAT          : Unknown
TREE_ORDER           : G/V
ALL_WEIGHTS          : false
WEIGHTING            : gr
BIN_SIZE             : 20
IB2_OFFSET           : 0
KEEP_DISTRIBUTIONS   : false
DO_SLOPPY_LOO        : false
TARGET_POS           : 18446744073709551615
DO_SILLY             : false
DO_DIVERSIFY         : false
DECAY                : Z
SEED                 : -1
BEAM_SIZE            : 0
DECAYPARAM_A         : 1.00000
DECAYPARAM_B         : 1.00000
NORMALISATION        : None
NORM_FACTOR          : 1.00000
EXEMPLAR_WEIGHTS     : false
IGNORE_EXEMPLAR_WEIGHTS : true
NO_EXEMPLAR_WEIGHTS_TEST : true
VERBOSITY            : DI+DB
EXACT_MATCH          : false
HASHED_TREE          : true
GLOBAL_METRIC        : M
METRICS              : 
MVD_LIMIT            : 1
NEIGHBORS            : 1
PROGRESS             : 100000
CLIP_FACTOR          : 10

Examine datafile 'dimin.train' gave the following results:
Number of Features: 12
InputFormat       : C4.5

-test4-Phase 1: Reading Datafile: dimin.train
-test4-Start:          0 @ Mon May 31 11:03:35 2010
-test4-Finished:    2999 @ Mon May 31 11:03:35 2010
-test4-Calculating Entropy         Mon May 31 11:03:35 2010
Feature Permutation based on GainRatio/Values :
< 9, 5, 11, 1, 12, 7, 4, 3, 10, 8, 2, 6 >
-test4-Phase 2: Learning from Datafile: dimin.train
-test4-Start:          0 @ Mon May 31 11:03:35 2010
-test4-Finished:    2999 @ Mon May 31 11:03:35 2010

Size of InstanceBase = 19231 Nodes, (769240 bytes), 49.77 % compression
Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5


Starting to test, Testfile: dimin.test
Writing output in:          inc1.out
Algorithm     : IB1
Global metric : Value Difference, Prestored matrix
Deviant Feature Metrics:(none)
Size of value-matrix[1] = 168 Bytes 
Size of value-matrix[2] = 968 Bytes 
Size of value-matrix[3] = 968 Bytes 
Size of value-matrix[4] = 168 Bytes 
Size of value-matrix[5] = 168 Bytes 
Size of value-matrix[6] = 1904 Bytes 
Size of value-matrix[7] = 1904 Bytes 
Size of value-matrix[8] = 504 Bytes 
Size of value-matrix[9] = 104 Bytes 
Size of value-matrix[10] = 2904 Bytes 
Size of value-matrix[11] = 1728 Bytes 
Size of value-matrix[12] = 1248 Bytes 
Total Size of value-matrices 12736 Bytes 

Weighting     : GainRatio

-test4-Tested:      1 @ Mon May 31 11:03:35 2010
-test4-Tested:      2 @ Mon May 31 11:03:35 2010
-test4-Tested:      3 @ Mon May 31 11:03:35 2010
-test4-Tested:      4 @ Mon May 31 11:03:35 2010
-test4-Tested:      5 @ Mon May 31 11:03:35 2010
-test4-Tested:      6 @ Mon May 31 11:03:35 2010
-test4-Tested:      7 @ Mon May 31 11:03:35 2010
-test4-Tested:      8 @ Mon May 31 11:03:35 2010
-test4-Tested:      9 @ Mon May 31 11:03:35 2010
-test4-Tested:     10 @ Mon May 31 11:03:35 2010
-test4-Tested:    100 @ Mon May 31 11:03:35 2010
-test4-Ready:     950 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0791 (12003.74 p/s)

overall accuracy:        0.964211  (916/950), of which 62 exact matches 
There were 6 ties of which 6 (100.00%) were correctly resolved
-test4-Saving Weights in wg.1.wgt
-test4-Saving Probability Arrays in arr.1.arr
Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5


Starting to test, Testfile: dimin.test
Writing output in:          inc2.out
Algorithm     : IB1
Global metric : Value Difference, Prestored matrix
Deviant Feature Metrics:(none)
Size of value-matrix[1] = 168 Bytes 
Size of value-matrix[2] = 968 Bytes 
Size of value-matrix[3] = 968 Bytes 
Size of value-matrix[4] = 168 Bytes 
Size of value-matrix[5] = 168 Bytes 
Size of value-matrix[6] = 1904 Bytes 
Size of value-matrix[7] = 1904 Bytes 
Size of value-matrix[8] = 504 Bytes 
Size of value-matrix[9] = 104 Bytes 
Size of value-matrix[10] = 2904 Bytes 
Size of value-matrix[11] = 1728 Bytes 
Size of value-matrix[12] = 1248 Bytes 
Total Size of value-matrices 12736 Bytes 

Weighting     : GainRatio

-test4-Tested:      1 @ Mon May 31 11:03:35 2010
-test4-Tested:      2 @ Mon May 31 11:03:35 2010
-test4-Tested:      3 @ Mon May 31 11:03:35 2010
-test4-Tested:      4 @ Mon May 31 11:03:35 2010
-test4-Tested:      5 @ Mon May 31 11:03:35 2010
-test4-Tested:      6 @ Mon May 31 11:03:35 2010
-test4-Tested:      7 @ Mon May 31 11:03:35 2010
-test4-Tested:      8 @ Mon May 31 11:03:35 2010
-test4-Tested:      9 @ Mon May 31 11:03:35 2010
-test4-Tested:     10 @ Mon May 31 11:03:35 2010
-test4-Tested:    100 @ Mon May 31 11:03:35 2010
-test4-Ready:     950 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0866 (10965.92 p/s)

overall accuracy:        0.964211  (916/950), of which 62 exact matches 
There were 6 ties of which 6 (100.00%) were correctly resolved
-test4-Saving Weights in wg.2.wgt
-test4-Saving Probability Arrays in arr.2.arr
Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5


Starting to test, Testfile: dimin.test
Writing output in:          inc3.out
Algorithm     : IB1
Global metric : Value Difference, Prestored matrix
Deviant Feature Metrics:(none)
Size of value-matrix[1] = 168 Bytes 
Size of value-matrix[2] = 968 Bytes 
Size of value-matrix[3] = 968 Bytes 
Size of value-matrix[4] = 168 Bytes 
Size of value-matrix[5] = 168 Bytes 
Size of value-matrix[6] = 1904 Bytes 
Size of value-matrix[7] = 1904 Bytes 
Size of value-matrix[8] = 504 Bytes 
Size of value-matrix[9] = 104 Bytes 
Size of value-matrix[10] = 2904 Bytes 
Size of value-matrix[11] = 1728 Bytes 
Size of value-matrix[12] = 1248 Bytes 
Total Size of value-matrices 12736 Bytes 

Weighting     : GainRatio

-test4-Tested:      1 @ Mon May 31 11:03:35 2010
-test4-Tested:      2 @ Mon May 31 11:03:35 2010
-test4-Tested:      3 @ Mon May 31 11:03:35 2010
-test4-Tested:      4 @ Mon May 31 11:03:35 2010
-test4-Tested:      5 @ Mon May 31 11:03:35 2010
-test4-Tested:      6 @ Mon May 31 11:03:35 2010
-test4-Tested:      7 @ Mon May 31 11:03:35 2010
-test4-Tested:      8 @ Mon May 31 11:03:35 2010
-test4-Tested:      9 @ Mon May 31 11:03:35 2010
-test4-Tested:     10 @ Mon May 31 11:03:35 2010
-test4-Tested:    100 @ Mon May 31 11:03:35 2010
-test4-Ready:     950 @ Mon May 31 11:03:35 2010
Seconds taken: 0.0740 (12844.09 p/s)

overall accuracy:        0.964211  (916/950), of which 62 exact matches 
There were 6 ties of which 6 (100.00%) were correctly resolved
-test4-Saving Weights in wg.3.wgt
-test4-Saving Probability Arrays in arr.3.arr
Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5


Starting to test, Testfile: dimin.test
Writing output in:          inc4.out
Algorithm     : IB1
Global metric : Value Difference, Prestored matrix
Deviant Feature Metrics:(none)
Size of value-matrix[1] = 168 Bytes 
Size of value-matrix[2] = 968 Bytes 
Size of value-matrix[3] = 968 Bytes 
Size of value-matrix[4] = 168 Bytes 
Size of value-matrix[5] = 168 Bytes 
Size of value-matrix[6] = 1904 Bytes 
Size of value-matrix[7] = 1904 Bytes 
Size of value-matrix[8] = 504 Bytes 
Size of value-matrix[9] = 104 Bytes 
Size of value-matrix[10] = 2904 Bytes 
Size of value-matrix[11] = 1728 Bytes 
Size of value-matrix[12] = 1248 Bytes 
Total Size of value-matrices 12736 Bytes 

Weighting     : GainRatio

-test4-Tested:      1 @ Mon May 31 11:03:36 2010
-test4-Tested:      2 @ Mon May 31 11:03:36 2010
-test4-Tested:      3 @ Mon May 31 11:03:36 2010
-test4-Tested:      4 @ Mon May 31 11:03:36 2010
-test4-Tested:      5 @ Mon May 31 11:03:36 2010
-test4-Tested:      6 @ Mon May 31 11:03:36 2010
-test4-Tested:      7 @ Mon May 31 11:03:36 2010
-test4-Tested:      8 @ Mon May 31 11:03:36 2010
-test4-Tested:      9 @ Mon May 31 11:03:36 2010
-test4-Tested:     10 @ Mon May 31 11:03:36 2010
-test4-Tested:    100 @ Mon May 31 11:03:36 2010
-test4-Ready:     950 @ Mon May 31 11:03:36 2010
Seconds taken: 0.0727 (13075.49 p/s)

overall accuracy:        0.964211  (916/950), of which 62 exact matches 
There were 6 ties of which 6 (100.00%) were correctly resolved
-test4-Saving Weights in wg.4.wgt
-test4-Saving Probability Arrays in arr.4.arr
Examine datafile 'dimin.test' gave the following results:
Number of Features: 12
InputFormat       : C4.5


Starting to test, Testfile: dimin.test
Writing output in:          inc5.out
Algorithm     : IB1
Global metric : Value Difference, Prestored matrix
Deviant Feature Metrics:(none)
Size of value-matrix[1] = 168 Bytes 
Size of value-matrix[2] = 968 Bytes 
Size of value-matrix[3] = 968 Bytes 
Size of value-matrix[4] = 168 Bytes 
Size of value-matrix[5] = 168 Bytes 
Size of value-matrix[6] = 1904 Bytes 
Size of value-matrix[7] = 1904 Bytes 
Size of value-matrix[8] = 504 Bytes 
Size of value-matrix[9] = 104 Bytes 
Size of value-matrix[10] = 2904 Bytes 
Size of value-matrix[11] = 1728 Bytes 
Size of value-matrix[12] = 1248 Bytes 
Total Size of value-matrices 12736 Bytes 

Weighting     : GainRatio

-test4-Tested:      1 @ Mon May 31 11:03:36 2010
-test4-Tested:      2 @ Mon May 31 11:03:36 2010
-test4-Tested:      3 @ Mon May 31 11:03:36 2010
-test4-Tested:      4 @ Mon May 31 11:03:36 2010
-test4-Tested:      5 @ Mon May 31 11:03:36 2010
-test4-Tested:      6 @ Mon May 31 11:03:36 2010
-test4-Tested:      7 @ Mon May 31 11:03:36 2010
-test4-Tested:      8 @ Mon May 31 11:03:36 2010
-test4-Tested:      9 @ Mon May 31 11:03:36 2010
-test4-Tested:     10 @ Mon May 31 11:03:36 2010
-test4-Tested:    100 @ Mon May 31 11:03:36 2010
-test4-Ready:     950 @ Mon May 31 11:03:36 2010
Seconds taken: 0.0732 (12975.31 p/s)

overall accuracy:        0.964211  (916/950), of which 62 exact matches 
There were 6 ties of which 6 (100.00%) were correctly resolved
-test4-Saving Weights in wg.5.wgt
-test4-Saving Probability Arrays in arr.5.arr
\end{verbatim}
\end{footnotesize}
\clearpage

\subsection{example 5, {\tt api\_test5.cxx}}

This program demonstrates the use of neighborSets to classify and
store results. It also demonstrates some neighborSet basics.

\begin{footnotesize}
\begin{verbatim}
#include <iostream>
#include <string>
#include "TimblAPI.h"

using std::endl;
using std::cout;
using std::string;
using namespace Timbl;

int main(){
  TimblAPI *My_Experiment = new TimblAPI( "-a IB1 +vDI+DB+n +mM +k4 " , 
                                          "test5" );
  My_Experiment->Learn( "dimin.train" );  
  {
    string line =  "=,=,=,=,+,k,e,=,-,r,@,l,T";
    const neighborSet *neighbours1 = My_Experiment->classifyNS( line );
    if ( neighbours1 ){
      cout << "Classify OK on " << line << endl;
      cout << neighbours1;
    } else
      cout << "Classify failed on " << line << endl;
    neighborSet neighbours2;
    line = "+,zw,A,rt,-,k,O,p,-,n,O,n,E";
    if ( My_Experiment->classifyNS( line, neighbours2 ) ){
      cout << "Classify OK on " << line << endl;
      cout << neighbours2;
    } else
      cout << "Classify failed on " << line << endl;
    line = "+,z,O,n,-,d,A,xs,-,=,A,rm,P";
    const neighborSet *neighbours3 = My_Experiment->classifyNS( line );
    if ( neighbours3 ){
      cout << "Classify OK on " << line << endl;
      cout << neighbours3;
    } else
      cout << "Classify failed on " << line << endl;
    neighborSet uit2;
    {
      neighborSet uit;
      uit.setShowDistance(true);
      uit.setShowDistribution(true);
      cout << " before first merge " << endl;
      cout << uit;
      uit.merge( *neighbours1 );
      cout << " after first merge " << endl;
      cout << uit;
      uit.merge( *neighbours3 );
      cout << " after second merge " << endl;
      cout << uit;
      uit.merge( neighbours2 );
      cout << " after third merge " << endl;
      cout << uit;
      uit.truncate( 3 );
      cout << " after truncate " << endl;
      cout << uit;
      cout << " test assignment" << endl;
      uit2 = *neighbours1;
    }
    cout << "assignment result: " << endl;
    cout << uit2;
    {
      cout << " test copy construction" << endl;
      neighborSet uit(uit2);
      cout << "result: " << endl;
      cout << uit;
    }
    cout << "almost done!" << endl;
  }
  delete My_Experiment;
  cout << "done!" << endl;
}
\end{verbatim}
\end{footnotesize}

Its expected output is (without further comment):

\begin{footnotesize}
\begin{verbatim}
Examine datafile 'dimin.train' gave the following results:
Number of Features: 12
InputFormat       : C4.5

-test5-Phase 1: Reading Datafile: dimin.train
-test5-Start:          0 @ Mon May 31 11:03:36 2010
-test5-Finished:    2999 @ Mon May 31 11:03:36 2010
-test5-Calculating Entropy         Mon May 31 11:03:36 2010
Feature Permutation based on GainRatio/Values :
< 9, 5, 11, 1, 12, 7, 4, 3, 10, 8, 2, 6 >
-test5-Phase 2: Learning from Datafile: dimin.train
-test5-Start:          0 @ Mon May 31 11:03:36 2010
-test5-Finished:    2999 @ Mon May 31 11:03:36 2010

Size of InstanceBase = 19231 Nodes, (769240 bytes), 49.77 % compression
Classify OK on =,=,=,=,+,k,e,=,-,r,@,l,T
# k=1 { T 1.00000 } 0.0000000000000
# k=2 { T 1.00000 } 0.0031862902473388
# k=3 { T 1.00000 } 0.0034182315118303
# k=4 { T 1.00000 } 0.0037433772844615
Classify OK on +,zw,A,rt,-,k,O,p,-,n,O,n,E
# k=1 { E 1.00000 } 0.0000000000000
# k=2 { E 1.00000 } 0.056667880327190
# k=3 { E 1.00000 } 0.062552636617742
# k=4 { E 1.00000 } 0.064423860361889
Classify OK on +,z,O,n,-,d,A,xs,-,=,A,rm,P
# k=1 { P 1.00000 } 0.059729836255170
# k=2 { P 1.00000 } 0.087740769132651
# k=3 { P 1.00000 } 0.088442788919723
# k=4 { P 1.00000 } 0.097058649951429
 before first merge 
 after first merge 
# k=1 { P 1.00000 } 0.059729836255170
# k=2 { P 1.00000 } 0.087740769132651
# k=3 { P 1.00000 } 0.088442788919723
# k=4 { P 1.00000 } 0.097058649951429
 after second merge 
# k=1 { P 2.00000 } 0.059729836255170
# k=2 { P 2.00000 } 0.087740769132651
# k=3 { P 2.00000 } 0.088442788919723
# k=4 { P 2.00000 } 0.097058649951429
 after third merge 
# k=1 { E 1.00000 } 0.0000000000000
# k=2 { E 1.00000 } 0.056667880327190
# k=3 { P 2.00000 } 0.059729836255170
# k=4 { E 1.00000 } 0.062552636617742
# k=5 { E 1.00000 } 0.064423860361889
# k=6 { P 2.00000 } 0.087740769132651
# k=7 { P 2.00000 } 0.088442788919723
# k=8 { P 2.00000 } 0.097058649951429
 after truncate 
# k=1 { E 1.00000 } 0.0000000000000
# k=2 { E 1.00000 } 0.056667880327190
# k=3 { P 2.00000 } 0.059729836255170
 test assignment
assignment result: 
# k=1 { P 1.00000 } 0.059729836255170
# k=2 { P 1.00000 } 0.087740769132651
# k=3 { P 1.00000 } 0.088442788919723
# k=4 { P 1.00000 } 0.097058649951429
 test copy construction
result: 
# k=1 { P 1.00000 } 0.059729836255170
# k=2 { P 1.00000 } 0.087740769132651
# k=3 { P 1.00000 } 0.088442788919723
# k=4 { P 1.00000 } 0.097058649951429
almost done!
done!
\end{verbatim}
\end{footnotesize}
\clearpage

\subsection{example 6, {\tt api\_test6.cxx}}

This program demonstrates the use of ValueDistributions, TargetValues
an neighborSets for classification.

\begin{footnotesize}
\begin{verbatim}
#include <iostream>
#include "TimblAPI.h"

using std::cout;
using std::endl;
using namespace Timbl;

int main(){
  TimblAPI My_Experiment( "-a IB1 +vDI+DB -k3", "test6" );
  My_Experiment.Learn( "dimin.train" ); 
  const ValueDistribution *vd;
  const TargetValue *tv
    = My_Experiment.Classify( "-,=,O,m,+,h,K,=,-,n,I,N,K", vd );
  cout << "resulting target: " << tv << endl;
  cout << "resulting Distribution: " << vd << endl;
  ValueDistribution::dist_iterator it=vd->begin();
  while ( it != vd->end() ){
    cout << it->second << " OR ";
    cout << it->second->Value() << " " << it->second->Weight() << endl;
    ++it;
  }

  cout << "the same with neighborSets" << endl;
  const neighborSet *nb = My_Experiment.classifyNS( "-,=,O,m,+,h,K,=,-,n,I,N,K" );
  ValueDistribution *vd2 = nb->bestDistribution();
  cout << "default answer " << vd2 << endl;
  decayStruct *dc = new  expDecay(0.3);
  delete vd2;
  vd2 = nb->bestDistribution( dc );
  delete dc;
  cout << "with exponenial decay, alpha = 0.3 " << vd2 << endl;  
  delete vd2;
}
\end{verbatim}
\end{footnotesize}

This is the output produced:

\begin{footnotesize}
\begin{verbatim}
Examine datafile 'dimin.train' gave the following results:
Number of Features: 12
InputFormat       : C4.5

-test6-Phase 1: Reading Datafile: dimin.train
-test6-Start:          0 @ Mon May 31 11:03:36 2010
-test6-Finished:    2999 @ Mon May 31 11:03:36 2010
-test6-Calculating Entropy         Mon May 31 11:03:36 2010
Feature Permutation based on GainRatio/Values :
< 9, 5, 11, 1, 12, 7, 4, 3, 10, 8, 2, 6 >
-test6-Phase 2: Learning from Datafile: dimin.train
-test6-Start:          0 @ Mon May 31 11:03:36 2010
-test6-Finished:    2999 @ Mon May 31 11:03:36 2010

Size of InstanceBase = 19231 Nodes, (769240 bytes), 49.77 % compression
resulting target: K
resulting Distribution: { E 1.00000, K 7.00000 }
E 1 OR E 1
K 7 OR K 7
the same with neighborSets
default answer { E 1.00000, K 7.00000 }
with exponenial decay, alpha = 0.3 { E 0.971556, K 6.69810 }
\end{verbatim}
\end{footnotesize}

\end{document}