File: yaws.tex

package info (click to toggle)
yaws 1.65-4etch1
  • links: PTS
  • area: main
  • in suites: etch
  • size: 4,164 kB
  • ctags: 3,907
  • sloc: erlang: 20,138; sh: 3,675; makefile: 556; ansic: 404; lisp: 79
file content (2094 lines) | stat: -rw-r--r-- 68,027 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
\documentclass[11pt,oneside,english]{book}
\newif\ifpdf
\ifx\pdfoutput\undefined
   \pdffalse              % we are not running PDFLaTeX
\else
   \pdfoutput=1           % we are running PDFLaTeX
   \pdftrue
\fi

\ifpdf
  \usepackage[pdftex]{graphicx}
  \pdfcompresslevel=9
  \DeclareGraphicsExtensions{.png,.jpg,.pdf,.mps}
\else
  \usepackage{graphicx}
  \DeclareGraphicsExtensions{.ps,.eps}
\fi
\usepackage[T1]{fontenc}
\usepackage[latin1]{inputenc}
\usepackage{geometry}
\geometry{verbose,letterpaper,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
\usepackage{babel}
\setcounter{secnumdepth}{3}
\setlength\parskip{\medskipamount}
\setlength\parindent{0pt}
\usepackage{url}
\usepackage{pslatex}
\usepackage[colorlinks=false]{hyperref}

\newcommand{\Erlang}            % Write Erlang correctly
        {{\sc Erlang}}


\newcommand{\Yaws}            % Write Yaws correctly
        {{\sc Yaws}}


\makeatletter

\usepackage[T1]{fontenc}
\usepackage{xspace}
\usepackage{html}

\makeatother
\begin{document}



\title{Yaws - Yet Another Web Server}


\author{Claes Wikstrom\\
klacke@hyber.org}





\maketitle
\tableofcontents{}



\chapter{Introduction}


\begin{figure}[h]
\begin{center}

 \includegraphics[scale=0.6] {yaws_head}

\end{center}           
\end{figure}    

\Yaws\  is an \Erlang\  web server. It's written in \Erlang\  and it uses
\Erlang\  as its embedded language similar to PHP in Apache or Java in Tomcat.

The advantages of \Erlang\  as an embedded web page language as opposed to
Java or PHP are many.
\begin{itemize}

\item{Speed - Using \Erlang\  for both implementing the web server itself as well
as embedded script language gives excellent dynamic page generation
performance.}

\item{Beauty - Well this is subjective}

\item{Scalability - due to the light weight processes of \Erlang\ , \Yaws\ 
is able to handle a very large number of concurrent connections}

\end{itemize}

\Yaws\  has a wide feature set, it supports:

\begin{enumerate}
\item HTTP 1.0 and HTTP 1.1 
\item Static content page delivery
\item Dynamic content generation using embedded \Erlang\  code in the
HTML pages
\item Common Log Format traffic logs
\item Virtual hosting with several servers on the same IP address
\item Multiple servers on multiple IP addresses.
\item HTTP tracing for debugging
\item An interactive interpreter environment in the Web server while
developing and debugging the web site.
\item RAM caching of commonly accessed pages.
\item Full streaming capabilities of both up and down load of dynamically
generated pages.
\item SSL 
\item Support for WWW-Authenticated pages.
\item Support API for cookie based sessions.
\item Application Modules where virtual directory hierarchies can
be made.
\item Embedded mode
\end{enumerate}

\section{Prerequisites}
This document requires that the reader:
\begin{itemize}
\item Is well acquainted with the \Erlang\  programming language
\item Understands basic Web technologies.
\end{itemize}


\section{A tiny example}

We introduce \Yaws\  by help of a tiny example. 
 The web server \Yaws\  serves  and delivers
static content pages similar to any old web server, except that \Yaws\  does this 
much faster than most web servers. It's the dynamic pages
that makes \Yaws\  interesting. Any page with the suffix ``.yaws'' is considered
a dynamic \Yaws\  page. A \Yaws\  page can contain embedded \Erlang\  snippets that
are executed while the page is being delivered to the WWW browser.

Example 1.1 is the HTML code for a small \Yaws\  page.


\begin{figure}[h]
\begin{verbatim}
<html>

<p> First paragraph

<erl>
out(Arg) ->
    {html, "<p>This string gets inserted into HTML document dynamically"}.
</erl>

<p> And here is some more HTML code

</html>
\end{verbatim}
\caption{Example 1.1}
\end{figure}

It illustrates the basic idea behind \Yaws\ . The HTML code
can contain <erl> and </erl> tags and inside these tags an \Erlang\  function
called out/1 gets called and the output of that function is inserted
into the HTML document, dynamically. 

It is possible to have several chunks of HTML code together with several 
chunks of \Erlang\  code in the same \Yaws\  page.

The \verb+Arg+ argument supplied to the automatically invoked \verb+out/1+
function is an \Erlang\  record that contains various data which is interesting 
when generating dynamic pages. For example the HTTP headers which were sent
from the WWW client, the actual TCP/IP socket leading to the WWW client.
This will be elaborated on throughly in later chapters. 

The \verb+out/1+ function returned the tuple \verb+{html, String}+ and 
\verb+String+ gets inserted into the HTML output. There are number
of different return values that can be returned from the \verb+out/1+ function
in order to control the behavior and output from the \Yaws\  web server.



\chapter{Compile, Install, Config and Run}

This chapter is more of a ``Getting started'' guide than a full
description of the \Yaws\  configuration. 
\Yaws\  is hosted on Sourceforge at 
\url{ http://sourceforge.net/projects/erlyaws/ }. This is where the source code
resides in a CVS repository and the latest unreleased version is
available through anonymous CVS through the following commands:

\begin{verbatim}

# export CVS_RSH=ssh 
# export CVSROOT=:pserver:anonymous@cvs.erlyaws.sourceforge.net:/cvsroot/erlyaws
# cvs  login
# cvs -z3 co .

\end{verbatim}


Released version of \Yaws\  are available either at the Sourceforge site or
at \url{http://yaws.hyber.org/download}. 



\subsection{Compile and Install}

To compile and install a \Yaws\  release
one of the prerequisites is a properly installed \Erlang\  system. \Yaws\ 
runs on \Erlang\  releases OTP R8 and later. Get \Erlang\  from
\url{http://www.erlang.org/}

Compile and install is straight forward:
\begin{verbatim}
# cd /usr/local/src
# tar xfz yaws-X.XX.tar.gz
# cd yaws
# ./configure && make 
# make install
\end{verbatim}

The make command will compile the \Yaws\  web server with the \verb+erlc+
compiler found by the configure script.

make install - will install the executable - called \verb+yaws+ in
/usr/local/bin/ and a working configuration file in \textit{ /etc/yaws.conf}

make local\_install will install the executable in \$HOME/bin and a
working configuration file in \$HOME/yaws.conf

While developing a \Yaws\  site, it's typically most convenient to
use the local\_install and run \Yaws\  as a non privileged user.


\subsection{Configure}
Let's take a look at the config file that gets written to \$HOME after
a local\_install.


\begin{figure}[h]
\begin{verbatim}

# first we have a set of globals

logdir = .
ebin_dir = /home/klacke/yaws/yaws/examples/ebin
include_dir = /home/klacke/yaws/yaws/examples/include

# and then a set of servers

<server localhost>
        port = 8000
        listen = 127.0.0.1
        docroot = /home/klacke/yaws/yaws/scripts/../www
</server>


\end{verbatim}
\caption{Minimal Local Configuration}
\end{figure}

The configuration consists of an initial set of global
variables that are valid for all defined servers.

The only global directive we need to care about for now is the logdir. 
\Yaws\  produces a number of log files and they will -
using the Configuration from Figure 2.1 - end up in the current 
working directory.
We start \Yaws\  interactively as 
\begin{verbatim}
# ~/bin/yaws -i
Erlang (BEAM) emulator version 5.1.2.b2 [source]

Eshell V5.1.2.b2  (abort with ^G)
1> 
=INFO REPORT==== 30-Oct-2002::01:38:22 ===
Using config file /home/klacke/yaws.conf
=INFO REPORT==== 30-Oct-2002::01:38:22 ===
Listening to 127.0.0.1:8000 for servers ["localhost:8000"]

1>
\end{verbatim}

By starting \Yaws\  in interactive mode (using the command switch \textit{-i}
we get a regular \Erlang\  prompt. This is most convenient when developing
\Yaws\ /http pages. For example we:
\begin{itemize}
\item{Can dynamically compile and load optional helper modules we need.}
\item{Get all the crash and error reports written directly to the
terminal.}
\end{itemize}

The configuration in Example 2.1 defined one HTTP server on
address 127.0.0.1:8000 called "localhost".
It is important to understand the difference between the name and
the address of a server. The name is the expected value in the
client Host: header. That is typically the same as the fully qualified
DNS name of the server whereas the address is the actual 
IP address of the server.

Since \Yaws\  support virtual hosting with several servers on the same
IP address, this matters.

Nevertheless, our server listens to \textit{127.0.0.1:8000} and 
has the name "localhost", thus the correct URL for this server
is \textit{http://localhost:8000}.

The document root (docroot) for the server is set to the www directory in the
\Yaws\  source code distribution. This directory contains a bunch of
examples and we should be able to run all those example now on the
URL  \textit{http://localhost:8000}.

Instead of editing and adding files in the \Yaws\  www directory, we 
create yet another server on the same IP address but a different port
number - and in particular a different document root where we can add
our own files.

\begin{verbatim}
# mkdir ~/test
# mkdir ~/test/logs
\end{verbatim}

Now change the config so it looks like this:

\begin{verbatim}

logdir = /home/klacke/test/logs
ebin_dir = /home/klacke/test
include_dir = /home/klacke/test

<server localhost>
        port = 8000
        listen = 127.0.0.1
        docroot = /home/klacke/yaws/yaws/www
</server>

<server localhost>
        port = 8001
        listen = 127.0.0.1
        docroot = /home/klacke/test
</server>


\end{verbatim}

We define two servers, one being the original default
and a new pointing to a document root in our home directory.

We can now start to add static content in the form of
HTML pages, dynamic content in the form of .yaws pages or
\Erlang\ .beam code that can be used to generate the dynamic content.

The load path will be set so that beam code in the directory \verb+~/test+
will be automatically loaded when referenced.

It is best to run \Yaws\  interactively while developing the site.
In order to start the \Yaws\  as a daemon, we give the flags:
\begin{verbatim}
# yaws -D -heart
\end{verbatim}

The \textit{-D} flags instructs \Yaws\  to run as a daemon and the 
\textit{-heart} flags will start a heartbeat program called heart
which restarts the daemon if it should crash or if it stops responding to
a regular heartbeat.

Once started in daemon mode, we have very limited ways of interacting
with the daemon. It is possible to query the daemon using:
\begin{verbatim}
# yaws -S
\end{verbatim}

This command produces a simple printout of Uptime and number of hits
for each configured server.

If we change the configuration, we can HUP the daemon using the
command:
\begin{verbatim}
# yaws -h
\end{verbatim}

This will force the daemon to reread the configuration file.



\chapter{Static content}

\Yaws\  acts very much like any regular web server while delivering
static pages. By default \Yaws\  will cache static content in RAM.
The caching behavior is controlled by a number of global
configuration directives. Since the RAM caching occupies memory, 
it may be interesting to tweak the default values for the caching directives
or even to turn it off completely.

The following configuration directives control the caching behavior
\begin{itemize}
\item \textit{max\_num\_cached\_files = Integer}
\Yaws\   will  cache  small  files  such  as  commonly
              accessed  GIF images in RAM.  This directive sets a
              maximum number on the number of cached files.   The
              default value is 400.

\item\textit{max\_num\_cached\_bytes = Integer}
 This  directive  controls  the  total amount of RAM
             which can maximally be used for cached  RAM  files.
              The default value is 1000000, 1 megabyte.


\item\textit{max\_size\_cached\_file = Integer}

 This  directive  sets  a  maximum size on the files
              that are RAM cached by \Yaws\ .  The default  value  i
              8000, 8 batters.



\end{itemize}

It may be considered to be confusing, but the numbers specified 
in the above mentioned cache directives are local to each
server. Thus if we have specified \verb+max_num_cached_bytes = 1000000+
and have defined 3 servers, we may actually use $3 * 1000000$ bytes.




\chapter{Dynamic content}

Dynamic content is what \Yaws\  is about. Most web servers are designed
with HTTP and static content in mind whereas \Yaws\  is designed 
for dynamic pages from the start.
Most large sites on the Web today make heavy use of dynamic pages.



\section{Introduction}

When the client GETs a page that has a .yaws suffix, the \Yaws\  server
will read that page from the hard disk and divide it in parts
that consist of HTML code and \Erlang\  code. Each chunk of \Erlang\  code
will be compiled into a module. The chunk of \Erlang\  code must contain
a function \verb+out/1+. If it doesn't the \Yaws\  server will insert a
proper error message into the generated HTML output.

When the \Yaws\  server ships a .yaws page it will process it chunk by chunk
through the .yaws file. If it is HTML code, the server will ship that
as is, whereas if it is \Erlang\  code, the \Yaws\  server will invoke the
\verb+out/1+ function in that code and insert the output of that \verb+out/1+ function into the stream
of HTML that is being shipped to the client.

\Yaws\  will (of course) cache the result of the compilation
and the next time a client requests the same .yaws page \Yaws\  will
be able to invoke the already compiled modules directly.


\section{EHTML}

There are two ways to make the \verb+out/1+ function generate HTML
output. The first and most easy to understand is by returning a tuple
\verb+{html, String}+ where \verb+String+ then is regular HTML data
(possibly as a deep list of strings and/or binaries) which will simply
be inserted into the output stream.
An example:

\begin{verbatim}
<html>
<h1> Example 1 </h1>

<erl>
out(A) ->
    Headers = A#arg.headers,
    {html, io_lib:format("You say that you're running ~p",
                         [Headers#headers.user_agent])}.

</erl>

</html>

\end{verbatim}


The second way to generate output is by returning a tuple
\verb+{ehtml, EHTML}+. The term \verb+EHTML+ must adhere to the 
following structure:

$EHTML = [EHTML] | \{TAG, Attrs, Body\} | 
                   \{TAG, Attrs\} | \{TAG\} |
        binary() | character()$

$TAG         = atom()$

$Attrs = [\{HtmlAttribute, Value\}]$

$HtmlAttribute   = atom()$

$Value = string() | atom()$

$Body  = EHTML$

We give an example to show what we mean:
The tuple 
\begin{verbatim}
{ehtml, {table, [{bgcolor, grey}],
         [
          {tr, [], 
           [
            {td, [], "1"},
            {td, [], "2"},
            {td, [], "3"}
           ],
           {tr, [],
            [{td, [{colspan, "3"}], "444"}]}}]}}.
\end{verbatim}

Would be expanded into the following HTML code
\begin{verbatim}
<table bgcolor="grey">
  <tr>
    <td> 1 </td
    <td> 2 </td>
    <td> 3 </td>
  </tr>
  <tr>
    <td colspan="3"> 444 </td>
  </tr>
</table>

\end{verbatim}

At a first glance it may appears as if the HTML code is more
beautiful than the \Erlang\  tuple. That may very well be the
case from a purely aesthetic point of view. However the
\Erlang\  code has the advantage of being perfectly indented by editors
that have syntax support for \Erlang\  (read Emacs). Furthermore, the \Erlang\ 
code is easier to manipulate from an \Erlang\  program.

As an example of some more interesting ehtml we could have
an \verb+out/1+ function that prints some of the HTTP headers.

In the www directory of the \Yaws\  source code distribution we have
a file called \verb+arg.yaws+. The file demonstrates the Arg \#arg record
parameter which is passed to the \verb+out/1+ function.


But before we discuss that code, we describe the Arg record  
in detail.

Here is the \verb+yaws_api.hrl+ file which is in included by default
in all \Yaws\ files. The \#arg{} record contains many fields that are
useful when processing HTTP request dynamically.
We have access to basically all the information which associated to the
client request such as:
\begin{itemize}

\item The actual socket leading back to the HTTP client
\item All the HTTP headers - parsed into a \#headers record.
\item The HTTP request - parsed into a \#http\_request record
\item clidata - Data which is POSTed by the client
\item querydata - This is the remainder of the URL following the first 
occurrence of a ? character - if any.
\item docroot - The absolute path to the docroot of the virtual server
that is processing the request.
\end{itemize}



\begin{verbatim}


-record(arg, {
          clisock,        %% the socket leading to the peer client
          headers,        %% headers
          req,            %% request
          clidata,        %% The client data (as a binary in POST requests)
          server_path,    %% The normalized server path
          querydata,      %% Was the URL on the form of ...?query (GET reqs)
          appmoddata,     %% the remainder of the path leading up to the querey
          docroot,        %% where's the data
          fullpath,       %% full path to yaws file
          cont,           %% Continuation for chunked multipart uploads
          state,          %% State for use by users of the out/1 callback
          pid,            %% pid of the yaws worker process
          opaque,         %% useful to pass static data
          appmod_prepath, %% path in front of: <appmod><appmoddata>
          pathinfo        %% Set to 'd/e' when calling c.yaws for the request
                          %% http://some.host/a/b/c.yaws/d/e
         }).              


-record(http_request, {method,
                       path,
                       version}).

            
-record(headers, {
          connection,
          accept,
          host,
          if_modified_since,
          if_match,
          if_none_match,
          if_range,
          if_unmodified_since,
          range,
          referer,
          user_agent,
          accept_ranges,
          cookie = [],
          keep_alive,
          content_length,
          content_type,
          authorization,
          other = []   %% misc other headers
         }).

\end{verbatim}


There are a number of \textit{advanced} fields in the \#arg record
such as \verb+appmod+, \verb+opaque+ that will be discussed in later chapters.

Now, we show some code which displays the content of the Arg \#arg record.
The code is available in yaws/www/arg.yaws and after a a \verb+local_install+
a request to \textit{http://localhost:8000/arg.yaws} will run the code.

\begin{verbatim}

<html>

<h2> The Arg </h2>

<p>This page displays the Arg #argument structure
supplied to the out/1 function.

<erl>


out(A) ->
    Req = A#arg.req,
    H = yaws_api:reformat_header(A#arg.headers),
    {ehtml,
     [{h4,[], "The headers passed to us were:"},
      {hr},
      {ol, [],lists:map(fun(S) -> {li,[], {p,[],S}} end,H)},

      {h4, [], "The request"},
      {ul,[],
       [{li,[], f("method: ~s",  [Req#http_request.method])},
        {li,[], f("path: ~p",    [Req#http_request.path])},
        {li,[], f("version: ~p", [Req#http_request.version])}]},

      {hr},
      {h4, [], "Other items"},
      {ul,[],
       [{li,[], f("clisock from: ~p", [inet:peername(A#arg.clisock)])},
        {li,[], f("docroot: ~s",      [A#arg.docroot])},
        {li,[], f("fullpath: ~s",     [A#arg.fullpath])}]},
      {hr},
      {h4, [], "Parsed query data"},
      {pre,[], f("~p", [yaws_api:parse_query(A)])},
      {hr},
      {h4,[], "Parsed POST data "},
      {pre,[],  f("~p", [yaws_api:parse_post(A)])}]}.

</erl>

</html>

\end{verbatim}


The code utilizes 4 functions from the \verb+yaws_api+ module.
\verb+yaws_api+ is a general purpose www api module that contains various
functions that are handy while developing \Yaws\  code. We will see many 
more of those functions during the examples in the following chapters.

The functions used are:
\begin{itemize}
\item \verb+yaws_api:f/2+ alias for io\_lib:format/2. The \verb+f/2+ function
is automatically \verb+-includeed+ in all \Yaws\  code.
\item \verb+yaws_api:reformat_header/1+ - This function takes the \#headers record
and unparses it, that is reproduces regular text.
\item \verb+yaws_api:parse_query/1+ - The topic of next section.
\item \verb+yaws_api:parse_post/1+ -- Ditto.
\end{itemize}


\section{POSTs}

\subsection{Queries}

The user can supply data to the server in many ways. The most
common is to give the data in the actual URL.
If we invoke: 

\verb+GET http://localhost:8000/arg.yaws?kalle=duck&goofy=unknown+

we pass two parameters to the \textit{arg.yaws} page.
That data is URL-encoded by the browser and the server can retrieve the
data by looking at the remainder of the URL following the ? character.
If we invoke the \verb+arg.yaws+ page with the above mentioned URL we get
as the result of \verb+yaws_parse_query/1+:

$kalle = duck$

$goofy = unknown$

In \Erlang\  terminology, the call \verb+yaws_api:parse_query(Arg)+ returns
the list: 
\begin{verbatim}
[{kalle, "duck"}, {goofy, "unknown"}]
\end{verbatim}

Note that the first element is transformed into an atom, whereas the value
is still a string.

hence, a web page can contain URLs with a query and thus pass data to the
web server. This scheme works both with GET and POST requests.
It is the easiest way to pass data to the Web server since no FORM is required
in the web page.


\subsection{Forms}

In order to POST data a FORM is required, say that we have a page called
\verb+form.yaws+ that contain the following code:

\begin{verbatim}
<html>
<form action="/post_form.yaws"
      method="post"

<p> A Input field
<input name="xyz" type="text">
<input type="submit">
</form>
</html>
\end{verbatim}
 
This will produce a page with a simple input field and a Submit button.



\begin{figure}[h]
\begin{center}

 \includegraphics[scale=0.6] {a}

\end{center}           
\end{figure}    
 


If we enter something - say ``Hello there `` - 
in the input field and click the Submit button the
client will request the page indicated in the ``action'' attribute, namely
\verb+post_form.yaws+.


If that \Yaws\  page has the following code:
\begin{verbatim}
out(A) ->
   L = yaws_api:parse_post(A),
   {html, f("~p", [L])}
\end{verbatim}

The user will see the output
\begin{verbatim}
[{xyz, "Hello there"}]
\end{verbatim}

The differences between using the query part of the URL
and a form are the following:
\begin{itemize}
\item Using the query arg only works in GET request. We parse the
query argument with the function \verb+yaws_api:parse_query(Arg)+

\item If we use a form and POST the user data the client will
transmit the user data in the body of the request.
That is - the client sends a request to get the page using the POST method
and it then attaches the user data - encoded - into the body of the
request.

A POST request can have a query part in its URL as well as user data 
in the body.
\end{itemize}


\section{POSTing files}

It is possible to upload files from the client to the server by
means of POST. We indicate this in the form by telling the browser that we
want a different encoding, here is a form that does this:
\begin{verbatim}

out(A) ->
    Form = 
        {form, [{enctype, "multipart/form-data"},
                {method, post},
                {action, "file_upload_form.yaws"}],
                [{input, [{type, submit}, {value, "Upload"}]},
                 {input, [{type,file}, {width, "50"}, {name, foo}]}]},
    {ehtml, {html,[], [{h2,[], "A simple file upload page"},
                      Form]}}.

\end{verbatim}

The page delivers the entire HTML page with enclosing \verb+html+ markers.
It looks like:


\begin{figure}[h]
\begin{center}

 \includegraphics[scale=0.6] {b} 

\end{center}           
\end{figure}    
 
The user get an option to browse the local host for a file
or the user can explicitly fill in the file name in the input
field. The file browsing part is automatically taken care of by the
browser.

The action field in the form states that the client shall POST to a page called
\verb+file_upload_form.yaws+. This page will get the contents of the file
in the body of the POST message. Here we have one easy case and one hard
case. \Yaws\  will read the data from the client. However if the file is large
the entire contents of the file will not be part of the read operation.
It is not acceptable to let \Yaws\  continue to read the full POST body
and then when that is done, invoke the POST page. \Yaws\  must
feed the page with the chunks of the file as they arrive.

First the easy case:

Not YET  Written ... .....  fill this in later .....





\chapter{Mode of operation}

\section{On the fly compilation}
When the client requests a \Yaws\  page, \Yaws\  will look in its caches
(there is one cache per virtual server) to see if it finds the
requested page in the cache. If \Yaws\  doesn't find the page in the
cache, it will compile the page. This only happens the first time a
page is requested.
Say that the page is 400 bytes big has the following layout:



\begin{figure}[h]
\begin{center}

 \includegraphics[scale=0.4] {layout}

\end{center}           
\end{figure}    

The \Yaws\  server will then parse the file and produce a structure
which makes it possible to deliver the page in a readily fashion the
next time the same page is requested.

When shipping the page it will
\begin{enumerate}
\item Ship the first 100 bytes from the file
\item Evaluate the first \Erlang\  chunk in the file and ship the output
from the \verb+out/1+ function in that chunk. It will also jump ahead
in the file and skip 120 bytes.
\item Ship 80 bytes of HTML code 
\item Again evaluate an \Erlang\  chunk, this time the second and jump
ahead 60 bytes in the file.
\item And finally ship 140 bytes of HTML code to the client
\end{enumerate}

\Yaws\  writes the source output of the compilation into a directory
/tmp/yaws/\$UID. The beam files are never written to a file. 
Sometimes it can be useful to look at the generated source code
files, for example if the \Yaws\ /\Erlang\  code contains a compilation
error which is hard to understand.


\section{Evaluating the \Yaws\  code}

All client requests will execute in their own \Erlang\  process.
For each group of virtual hosts on the same IP:PORT pair
one \Erlang\  process listens for incoming requests. 

This process spawns acceptor processes for each incoming request.
Each acceptor process reads and parses all the HTTP headers from the
client. It then looks at the Host: header to figure out which 
virtual server to use, i.e. which docroot to use for this
particular request. If the Host: header doesn't match 
any server from \textit{yaws.conf} with that IP:PORT pair, the first
one from \textit{yaws.conf} is chosen.


By default \Yaws\  will not ship any data at all to the client
while evaluating a \Yaws\  page. The headers as well as the generated
content are accumulated and not shipped to the client until the 
entire page has been processed.



\chapter{SSL}

SSL - Secure Socket Layer is a protocol used on the Web for
delivering encrypted pages to the WWW client. SSL is widely deployed
on the Internet and virtually all bank transactions as well as all
on-line shopping today is done with SSL encryption. There are many
good sources on the net that describes SSL in detail - and I will not
try to do that here. 
There  is for example a good document at:
\url{http://www.tldp.org/HOWTO/SSL-Certificates-HOWTO/} which
describes how to manage certificates and keys.

In order to run an SSL server we must have a certificate. Either we
can create a so called self-signed certificate ourselves or buy a
certificate from one of the many CA's (Certificate Authority's) on the
net. \Yaws\  use the otp interface to openssl.

To setup a \Yaws\  server with SSL we could have a \textit{yaws.conf} file that
looks like:

\begin{verbatim}

 logdir = /var/log/yaws

<server www.funky.org>
               port = 443
               listen = 192.168.128.32
               docroot = /var/yaws/www.funky.org
               <ssl>
                  keyfile = /etc/funky.key
                  certfile = /etc/funky.cert
                  password = gazonk
               </ssl>
       </server>
\end{verbatim}

This is the easiest possible SSL configuration. The configuration
refers to a certificate file and a key file. The certificate file
must contain the name "www.funky.org" as it "Common Name".

The keyfile is the private key file and it is encrypted using
the password "gazonk".






\chapter{Applications}

\Yaws\  is well suited for Web applications. In this chapter we will
describe a number of application templates. Code and strategies that
can be used to build Web applications.

There are several ways of starting applications from
\Yaws\  . 

\begin{itemize}
\item The first and most easy variant is to specify
the \verb+-r Module+ flag to the \Yaws\  startup script.
This will \verb+apply(Module,start,[])+

\item We can also specify runmods in the \textit{yaws.conf} file.
It is possible to have several modules specified if want
the same \Yaws\  server to run several different applications.

\begin{verbatim}

runmod = myapp
runmod = app_number2

\end{verbatim}

\item It is also possible to do it the other way around, let 
the main application start \Yaws\ . We call this embedded mode 
and that will be discussed in a later chapter,

\end{itemize}



\section{Login scenarios}

Many Web applications require the user to login. Once the user has
logged in the server sets a Cookie and then the user will be
identified by help of the cookie in subsequent requests. 

  \subsection{The session server}
The cookie is passed in the headers and is available to the \Yaws\ 
programmer in the Arg \#arg record. The \Yaws\  session server
can help us to maintain a state for a user while the user is
logged in to the application. The session server has the following 5
api functions to aid us:

\begin{enumerate}

\item \verb+yaws_api:new_cookie_session(Opaque)+
This function initiates a new cookie based session. The Opaque data
is typically some application specific structure which makes it
possible for the application to read a user state, or it can be the
actual user state itself.

\item \verb+yaws_api:cookieval_to_opaque(Cookie)+
This function maps a cookie to a session.

\item \verb+yaws_api:replace_cookie_session(Cookie, NewOpaque)+
Replace the Opaque user state in the session server.

\item \verb+yaws_api:delete_cookie_session(Cookie)+
This function should typically be called when the user logs out
or when our web application decides to auto logout the user.

\end{enumerate}


All cookie based applications are different but they have 
some things in common. In the example that follow we assume the
existence of a function \verb+myapp:auth(UserName, Passwd)+ and it
returns \verb+ok+ or \verb+{error, Reason}+


Furthermore - let's have a record:

\begin{verbatim}

-record(session, {user,
                  passwd,
                  udata = []}).

\end{verbatim}

The following function is a good template function to 
check the cookie.


\begin{verbatim}

get_cookie_val(CookieName, Arg) ->
    H = Arg#arg.headers,
    yaws_api:find_cookie_val(CookieName, H#headers.cookie).



check_cookie(A, CookieName) ->
    case get_cookie_val(CookieName, A) of
        []  ->
            {error, "not logged in"};
        Cookie ->
            yaws_api:cookieval_to_opaque(Cookie)
    end.

\end{verbatim}


So what we need to do is the following: We want to check all
requests and make sure the the session\_server has our cookie registered as
an active session.

If a request comes in without a working cookie we want to present
a login page instead of the page the user requested.

Another quirky issue is that the pages necessary for display of the
login page must be shipped without checking the cookie.

  \subsection{Arg rewrite}

In this section we describe a feature whereby the user is allowed to
rewrite the Arg at an early stage in the \Yaws\  server.
We do that by specifying an \verb+arg_rewrite_mod+ in the \textit{yaws.conf} file.
\begin{verbatim}
arg_rewrite_mod = myapp
\end{verbatim}


Then in the \verb+myapp+ module we have:

\begin{verbatim}
arg_rewrite(Arg) ->
    OurCookieName = "myapp_sid"
    case check_cookie(A, OurCookieName) of
        {error, _} ->
            do_rewrite(Arg);
        {ok, _Session} ->
            %return Arg untouched
            Arg
    end.

%% these pages must be shippable without a good cookie
login_pages() ->
    ["/banner.gif", "/login.yaws", "/post_login.yaws"].

do_rewrite(Arg) ->
    Req = Arg#arg.req,
    {abs_path, Path} = Req#http_request.path,
    case lists:member(Path, login_pages()) of
        true ->
            Arg;
        false ->
            Arg#arg{req = Req#http_request{path = {abs_path, "/login.yaws"}},
                    state =  {abs_path, Path}}
    end.

\end{verbatim}

Our arg rewrite function lets all Args go through untouched 
that either have a good cookie or belong to a set of predefined
pages that are acceptable to get without being logged in.
If we decode that the user must log in,
 we change the path of the request,
 thereby making the \Yaws\  server ship a login page instead of the page the 
user requested. We also set the original path in the Arg state argument so
that the login page can redirect the user to the original page - once the login procedure is finished.



  \subsection{Authenticating}


Now we're approaching the \verb+login.yaws+ page, the page that displays
the login prompt to the user. The login page consists of two parts,
one part that displays the login data as a form and one form processing page
that reads the data the user entered in the login fields and performs
the actual authentication.

The login page performs a tiny well known Web trick where it
passes the original URL request in a hidden field in the login page and thereby passing that information to the form processing page.

The page \verb+login.yaws+:

\begin{verbatim}
<erl>

out(A) ->
    {ehtml,
     {html,[],
      [{h2, [], "Login page"},
       {hr},
       {form, [{action,"/login_post.yaws"},
               {method,post}],
        
        [{p,[], "Username"}, {input, [{type,text},{name,uname}]},
         {p,[],"Password"},  {input, [{type,password},{name,passwd}]},
         {input, [{type,submit},{value,"Login"}]},
         {input, [{type,hidden},{name,url}, 
                  {value, A#arg.state}]}]}]}}.

</erl>
\end{verbatim}



The form processing page which gets the POST data from the
code above looks like:
\begin{verbatim}


<erl>

-include("myapp.hrl").  
%% we have the session record there
%% we must set the include_path in the yaws.conf file
%% in order for the compiler to find that file

kv(K,L) ->
    {value, {K, V}} = lists:keysearch(K,1,L),
    V.
    
out(A) ->
    L = yaws_api:parse_post(A),
    User = kv(user, L),
    Pwd =  kv(passwd, L),
    case myapp:auth(User, Pwd) of
        ok ->
            S = #session{user = User,
                         passwd = Pwd,
                         udata = []},
            %% Now register the session to the session server
            Cookie = yaws_api:new_cookie_session(S),
            [{redirect_local, kv(url, L)},
              yaws_api:setcookie("myapp_sid",Cookie)]
        Err ->
            {ehtml, 
             {html, [],
              {p, [], f("Bad login: ~p",[Err])}}}
    end.

</erl>


    
\end{verbatim}

The function returns a list of two new previously not discussed return
values: Instead
of returning HTML output as in \verb+{html, Str}+ or
\verb+{ehtml,Term}+ 
we return a list of two new values. There are many different possible
return values from the \verb+out/1+ function and they will all be
described later. 

\begin{enumerate}

\item The tuple \verb+{redirect_local, Path}+. 
This particular redirect return value will make the 
\Yaws\  web server return a 302 redirect to the specified Path.
Optionally a different status code can be supplied which will
be used in place of 302, eg \verb+{redirect_local, Path, 307}+. 

\item \verb+yaws_api:setcookie("myapp_sid",Cookie)+ generates
a \verb+Set-Cookie+ header
\end{enumerate}



Now if we put all this together we have a full blown cookie based
login system. The last thing we did in the form processing code was
to register the session with the session server thereby letting any
future requests go straight through the \verb+Arg+ rewriter.

This way both \Yaws\  pages as well as all or some static content
is protected by the cookie login code.


\subsection{Database driven applications}

We can use code similar to the code in the previous section to associate
a user session to entries in a database. Mneisa fits perfectly
together with \Yaws\  and keeping user persistent state in Mnesia is
both easy and convenient.

Once the user has logged in we can typically use the user name
as key into the database. We can mix ram\_tables and disc\_tables
to our liking. The Mneisa database must be initialized by means
of \verb+create_table/2+ before it
can be used. This is typically done while installing the
web application on a machine. 

Another option is to let the application check that Mnesia
is initialized whenever the application starts.

If we don't want or need to use Mnesia, it's of course possible
to use a simple \verb+dets+ file or a text file as well. 

\section{Appmods}

Appmods is mechanism to invoke different applications
based upon the URL. A URL - as presented to the web server in 
a request - has a path part and a query part.

It is possible to install several appmods in the \textit{yaws.conf}
file as:
\begin{verbatim}

appmods = foo myapp

\end{verbatim}

Now, if the user requests a URL where any component in the
directory path is an appmod, the parsing of the URL will terminate
there and instead of reading the actual file from the disk, \Yaws\  will
invoke the appmod with the remainder of the path inserted into
\verb+Arg#arg.appmoddata+.

Say the user requests the URL \textit{http://www.funky.org/myapp/xx/bar.html}
\Yaws\  will not ship the file \verb+bar.html+ to the client, instead it
will invoke \verb+myapp:out(Arg)+ with \verb+Arg#arg.appmoddata+
set to the string \verb+xx/bar.html+. Any optional query data - that
is data that follows the first "?" character in the URL - 
is removed from the path and passed as \verb+Arg#arg.querydata+.

Appmods can be used to run applications on a server. All requests
to the server that has an appmod in the URL will be handled by that
application. If the application decides that it want to 
ship a page from the disk to the client, it can return the
tuple \verb+{page, Path}+. This return value will make \Yaws\  read
the page from the disk, possibly add the page to it's cache of
commonly accessed pages and ship it back to the client.

The \verb+{page, Path}+ return value is equivalent to a
redirect, but it removes an extra round trip - and is thus faster.

Appmods can also be used to fake entire directory hierarchies
that doesn't exists on the disk. 


\section{The opaque data}

Sometimes an application needs application specific data
such as the location of its data files or whatever. There exists 
a mechanism to pass application specific configuration data from the
\Yaws\  server to the application. 

When configuring a server we have an opaque field in the
configuration file that can be used for this purpose.
Say that we have the following fields in the 
config file:
\begin{verbatim}

<server foo>
    listen = 192.168.128.44
    <opaque>
        foo = bar
        somefile = /var/myapp/db
        myname = hyber
    </opaque>
</server>
\end{verbatim}

This will create a normal server that listens to the specified IP address.
An application has access to the opaque data that was specified
in that particular server through \verb+Arg#arg.opaque+

If we have the opaque data specified above, the Arg opaque field will
have the value:

\begin{verbatim}

[{foo, "bar"}, 
 {somefile, "/var/myapp/db"},
 {myname, "hyber"}
]

\end{verbatim}





\section{Customizations}

When actually deploying an application at a live site, some
of the standard \Yaws\  behaviors are not acceptable. Many sites
want to customize the web server behavior when a client requests
a page that doesn't exists on the web server. The standard \Yaws\ 
behavior
is to reply with status code 404 and a message explaining that the
page doesn't exist.

Similarly, when \Yaws\  code crashes, the Reason for the crash is
displayed in the Web browser. This is very convenient while
developing a sit but not acceptable in production.


  \subsection{404 File not found}

We can install a special handler for 404 messages. We do that by
specifying a \verb+errormod_404+ in the \textit{yaws.conf} file.

If we have:

\begin{verbatim}
<server foo>
  ..
  ..
  ..
  errormod_404 = myapp

</server>

\end{verbatim}

When \Yaws\  gets a request for  a file that doesn't exists
on the hard disk, it invokes the errormod\_404 module
to generate both the status code as well as the content of the
message.

        \verb+Module:out404(Arg, GC, SC)+ will
                   be invoked by \Yaws\ . The arguments are

\begin{itemize}
\item              Arg is a \#arg{} record

\item              GC  is  a  \#gconf{}   record   (defined   in
              yaws.hrl)

\item              SC   is   a   \#sconf{}  record  (defined  in
              yaws.hrl)
\end{itemize}

              The function can and must do the same things
              that a normal \verb+out/1+ does.




  \subsection{Crash messages}

We use a similar technique for generating the crash messages, we
install a module in the \textit{yaws.conf} file and let that module generate
the crash message.
We have:
\begin{verbatim}
errormod_crash = Module
\end{verbatim}

The  default  is  to  display the
entire  formated  crash   message   in   the
browser.  This is good for debugging but not
in production.

The function \verb+Module:crashmsg(Arg,  SC,  Str)+
will  be  called.  The Str is the real crash
message formated as a string.



\section{Stream content}

If the \verb+out/1+ function returns the tuple
\verb+{content, MimeType, Content}+ \Yaws\  will
ship that data to the Client. This way we can
deliver dynamically generated content to the client
which is of a different mime type than "text/html".

If the generated file is very large and it not
possible to generate the entire file, we can
return the value:
\verb+{streamcontent, MimeType, FirstChunk}+ and then
from a different \Erlang\  process deliver the remaining chunks by using
the functions:
\begin{enumerate}
\item \verb+yaws_api:stream_chunk_deliver(YawsPid, Data)+
where the \verb+YawsPid+ is the process id of the \Yaws\  worker
process. That pid is available in \verb+Arg#arg.pid+

\item \verb+stream_chunk_end(YawsPid)+ This function
must be called to indicate the end of the stream.
\end{enumerate}


\section{All out/1 return values}

\begin{itemize}


\item       \verb+{html, DeepList}+
              This  assumes that DeepList is formatted HTML code.
              The code will be inserted in the page.

\item       \verb+{ehtml, Term}+
              This will transform the \Erlang\   term  Term  into  a
              stream of HTML content. 

\item       \verb+{content, MimeType, Content}+
              This function will make  the  web  server  generate
              different  content  than HTML. This return value is
              only allowed in a \Yaws\   file  which  has  only  one
              <erl> </erl> part and no html parts at all.

\item       \verb+{streamcontent, MimeType, FirstChunk}+
              This  return  value plays the same role as the con
              tent return value above.  However it makes it  possible 
              to stream data to the client if the \Yaws\  code
              doesn't have access to all  the  data  in  one  go.
              (Typically  if  a  file  is  very  large or if data
              arrives from back end servers on the network.

\item       \verb+{header, H}+
              Accumulates a HTTP header. Used by for example  the
              \verb+yaws_api:setcookie/2-6+ function.

\item       \verb+{allheaders, HeaderList}+
              Will  clear  all previously accumulated headers and
              replace them.

\item       \verb+{status, Code}+
              Will set another HTTP status code than 200.

\item       \verb+break+  Will stop processing of any consecutive  chunks  of
              erl or html code in the \Yaws\  file.

\item       \verb+ok+     Do nothing.

\item       \verb+{redirect, Url}+
              Erase  all previous headers and accumulate a single
              Location header. Set the status code.

\item       \verb+{redirect, Url, Status}+
              Same as redirect above with the additional 
	      option of supplying the status code. The default
	      for a redirect is 302 but 301, 303 and 307 are also
	      valid redirect status codes.

\item       \verb+{redirect_local, Path}+
              Does a redirect to the same Scheme://Host:Port/Path
              as we currently are executing in. Path can be 
	      either be the path directly (equivalent to 
	      \verb+abs_path+), or one of \verb+{{abs_path, Path}+ or 
	      \verb+{{rel_path, RelativePath}}+ 

\item       \verb+{redirect_local, Path, Status}+
              Same as \verb+redirect_local+ above with the additional 
	      option of supplying the status code. The default
	      for a redirect is 302 but 301, 303 and 307 are also
	      valid redirect status codes.

\item       \verb+{get_more, Cont, State}+
              When  we  are  receiving  large POSTs we can return
              this value and be  invoked  again  when  more  Data
              arrives.

\item       \verb+{page, Page}+
              Make Yaws return a different page than the one being
              requested.

\item       \verb+{page, {Options, Page}}+

              Like the above, but supplying an additional deep list of
              options.  For now, the only type of option is
              \verb+{header, H}+ with the effect of accumulating the
              HTTP header \verb+H+ for page \verb+Page+.

\item       \verb+[ListOfValues]+

              It is possible to return a list of the above defined
              return values.  Any occurrence of \verb+stream_content+,
              \verb+get_more+, or \verb+page+ in this list is legal
              only if it is the last position of the list.

\end{itemize}



\chapter{Debugging and Development}

\Yaws\  has excellent debugging capabilities. First and foremost we
have the ability to run the web server in interactive mode by means of
the command line switch \verb+-i+

This gives us a regular \Erlang\  command line prompt and we can
use that prompt to compile helper code or reload helper
code. Furthermore all error messages are displayed there.
If a .yaws page producees any regular \Erlang\ io, that output will
be displayed at the \Erlang\ prompt - assuming that we are running in interactive mode.

If we give the command line switch \verb+-d+ we get some
additional error messages. Also \Yaws\  does some additional checking
of user supplied data such as headers.

\section{Logs}
\Yaws\  produces various logs. All log files are written into the
\Yaws\  logdir directory. This directory is specified in the config file.

We have the following log files:
\begin{itemize}
\item The access log. Access logging is turn on or off per server
in the \textit{yaws.conf} file. If access\_log is turned on for a server,
\Yaws\  will produce a log in Common Access Log Format called
\textit{HostName:PortNumber.access}

\item \textit{report.log} This file contains all error and crash
messages for all virtual servers in the same file.

\item \textit{trace.traffic} and \textit{trace.http} The two
command line flags \verb+-t+ and \verb+-T+ tells \Yaws\  to trace 
all traffic or just all HTTP messages and write them to a file.
\end{itemize}


\chapter{External scripts via CGI}

Yaws can also interface to external programs generating dynamic
content via the Common Gateway Interface (CGI).  This has to be
explicitly enabled for a virtual host by listing \verb+cgi+ in the
\verb+allowed_scripts+ line in the configuration file.  Any request
for a page ending in \verb+.cgi+ (or \verb+.CGI+) will then result in
trying to execute the corresponding file.

If you have a Php executable compiled for using CGI in the \verb+PATH+
of the Yaws server, you can enable Php support by adding \verb+php+ to
\verb+allowed_scripts+.  Requests for pages ending in \verb+.php+ will
then result in Yaws executing \verb+php+ (configurable via
\verb+php_exe_path+) and passing the name of the corresponding file to
it via the appropriate environment variable.

These ways of calling CGI scripts are also available to \verb+.yaws+
scripts and appmods via the functions \verb+yaws_api:call_cgi/2+ and
\verb+yaws_api:call_cgi/3+.  This makes it possible to write wrappers
for CGI programs, irrespective of the value of \verb+allowed_scripts+.

This is a new feature in Yaws.  It is used by its author for self
written CGI programs as well as for using a standard CGI package, so
it is working.  You should not be surprised however, should some
scripts not work as expected due to an incomplete or incorrect
implementation of certain CGI meta-variables.  The author of this
feature is therefore interested in hearing about your experiences with
it.  He can be contacted as \verb+carsten@codimi.de+.

\chapter{Security}

\Yaws\  is of course susceptible to intrusions. \Yaws\  has the
ability to run under a different user than root - Assuming we need
to listen to privileged port numbers. Running as root is generally a
bad idea.  

Intrusions can happen basically at all places in \Yaws\  code where the
\Yaws\  code calls either the BIF \verb+open_port+ or when \Yaws\  code
does calls to \verb+os:cmd/1+.

Both \verb+open_port+ and \verb+os:cmd/1+ invoke the \verb+/bin/sh+
interpreter to execute its commands. If the commands are nastily
crafted bad things can easily happen.

All data that is passed to these two function must be carefully
checked.

Since \Yaws\  is written in \Erlang\  a large class of cracks are
eliminated since it is not possible to perform any buffer overrun
cracks on a \Yaws\  server. This is very good.


Another possible point of entry to the system is by providing a URL
which takes the client out from the docroot. This should not be
possible - and the impossibility relies on the correctness of the URL
parsing code in \Yaws\ . 

\section{WWW-Authenticate}
\Yaws\  has support for WWW-Authentication.   WWW-Authenticate is a 
standard HTTP scheme for the basic protection of files with a username
and password.  When a client browser wants a protected file, it must send a
``Authenticate: username:password'' header in the request.  Note that
this is plain text.   If there is no such header or the username and
password is invalid the server will respond with status code 401 and
the realm.  Browsers will then tell the user that a username and
password is needed for ``realm'',  and will resend the request after
the user enters the information.

WWW-Authentication is configured in the \textit{yaws.conf} file, in as
many \textit{<auth>} directives as you desire:

\begin{verbatim}
<server foo>
  docroot = /var/yaws/www/

..
..

  <auth>
    realm = secretpage
    dir   = /protected
    dir   = /anotherdir
    user  = klacke:gazonk
    user  = jonny:xyz
    user  = ronny:12r8uyp09jksfdge4
  </auth>
</server>
\end{verbatim}


\Yaws\  will require one of the given username:password pairs for all
files in the \textit{/protected} and \textit{/anotherdir} directories.
Note that these directories are specified as a server path,  that is,
the filesystem path that is actually protected here is
\textit{/var/yaws/www/protected}


\chapter {Embedded mode}

\Yaws\  is a normal OTP application. It is possible to integrate \Yaws\ 
into another - larger - application. The \Yaws\  source tree must be
integrated into the larger applications build environment. \Yaws\  is
then simply started by \verb+application:start()+ from the larger
applications boot script.

By default \Yaws\  reads its configuration data from a config file, the
default is "/etc/yaws.conf". If \Yaws\  is integrated into a larger
application that application typically has its configuration data kept
at some other centralized place. Sometimes we may not even have a file
system to read the configuration from if we run a small embedded
system.

\Yaws\  reads its application environment. If the environment key
\verb+embedded+ is set to t\verb+true+, \Yaws\  starts in embedded mode.
Once started it must be fed a configuration, and that can be done
after \Yaws\  has started by means of the function
\verb+yaws_api:setconf/2+.

It is possible to call \verb+setconf/2+ several times to force \Yaws\  to
reread the configuration.




\chapter{The config file - yaws.conf}

In this section we provide a complete listing of all possible
configuration file options.
The
       configuration contains two distinct parts  a  global  part
       which  affects  all  the  virtual  hosts and a server part
       where options for each virtual host is supplied.

\section{Global Part}

\begin{itemize}


\item       \verb+logdir = Directory+ -
              All \Yaws\  logs will be  written  to  files  in  this
              directory.  There  are  several different log files
              written by \Yaws\ .

              \begin{itemize}
              \item report.log - this is a text file that contains  all
              error logger printouts from \Yaws\ .
              \item Host.access - for each virtual host served by \Yaws\ ,
              a file Host.access will be written  which  contains
              an access log in Common Log Format.
              \item trace.http  -  this file contains the HTTP trace if
              that is enabled
              \item trace.traffic -  this  file  contains  the  traffic
              trace if that is enabled
              \end{itemize}

\item        \verb+ebin_dir = Directory+ -
              This  directive adds Directory to the \Erlang\  search
              path. It is possible to have several of these  command 
              in the configuration file.

\item        \verb+include_dir = Directory+ -
              This directive adds Directory to the path of directories  
               where  the  \Erlang\   compiler  searches  for
              include  files.  We  need to use this if we want to
              include .hrl files in our \Yaws\  \Erlang\  code.

\item        \verb+max_num_cached_files = Integer+ -
              \Yaws\   will  cache  small  files  such  as  commonly
              accessed  GIF images in RAM.  This directive sets a
              maximum number on the number of cached files.   The
              default value is 400.

\item        \verb+max_num_cached_bytes = Integer+ -
              This  directive  controls  the  total amount of RAM
              which can maximally be used for cached  RAM  files.
              The default value is 1000000, 1 megabyte.

\item        \verb+max_size_cached_file = Integer+ -
              This  directive  sets  a  maximum size on the files
              that are RAM cached by \Yaws\ .  The default  value  i
              8000, 8 kBytes.

\item        \verb+cache_refresh_secs = Integer+
              The  RAM  cache  is used to serve pages that sit in
              the  cache.  An  entry  sits  in  cache   at   most
              cache\_refresh\_secs  number  of seconds. The default
              is 30. This means that when the content is  updated
              under  the  docroot, that change doesn't show until
              30 seconds have passed.  While  developing  a  \Yaws\ 
              site,  it may be convenient to set this value to 0.
              If the debug flag (-d) is passed to the \Yaws\   start
              script, this value is automatically set to 0.

\item        \verb+trace  = traffic | http+ -
              This  enables  traffic  or http tracing. Tracing is
              also possible to enable with a command line flag to
              \Yaws\ .

\item        \verb+username  = User+ -
             When Yaws is run as root, it can be configured to 
             change userid once it has created the necessary
             listen sockets on privilged ports.


\end{itemize}



\section{Server Part}

\Yaws\   can  virthost  several  web servers  on  the  same IP
       address as well as  several  web servers  on  different  IP
       addresses.  The  on  limitation  here is that there can be
       only one server with ssl enabled per  each  individual  IP
       address.   Each virtual host is defined within a matching
       pair of \verb+<server ServerName>+ and \verb+</server>+. 
       The  ServerName
       will be the name of the web server.

       The following directives are allowed inside a server definition.

\begin{itemize}




\item              \verb+port = Port+ -
              This makes the server listen on Port

\item        \verb+listen = IpAddress+ -
              This makes the  server  listen  on  IpAddress  When
              virthosting  several  servers  on  the same IP/port
              address, if the browser doesn't send a Host: field,
              \Yaws\   will  pick  the first server specified in the
              config file

\item       \verb+rport = Port+
              This forces  all  local  redirects  issued  by  the
              server  to  go  to  Port.  This is useful when \Yaws\ 
              listens to a port which is different from the  port
              that  the  user  connects  to. For example, running
              \Yaws\  as a non-privileged user makes  it  impossible
              to  listen  to port 80, since that port can only be
              opened by a privileged user. Instead  \Yaws\   listens
              to  a high port number port, 8000, and iptables are
              used to redirect traffic to port 80  to  port  8000
              (most NAT:ing firewalls will also do this for you).

\item       \verb+rscheme = http | https+
              This forces  all  local  redirects  issued  by  the
              server  to  use this method. This is useful when an
              SSL off-loader, or stunnel, is  used  in  front  of
              \Yaws\ .

\item       \verb+access_log = true | false+
              Setting  this  directive  to  false turns of
              traffic logging for this virtual server. The
              default value is true.

\item               \verb+docroot  =  Directory+ - This makes the server
              serve all its content from Directory

\item       \verb+partial_post_size = Integer+ -
              When a \Yaws\  file receives large  POSTs,  the
              amount  of  data  received  in each chunk is
              determined by the this parameter.  The default 
              value is 10240.

\item       \verb+tilde_expand = true|false+ -
              If  this  value  is  set  to false \Yaws\  will
              never do tilde  expansion.  The  default  is
              false.   tilde\_expansion   is  the  mechanism
              whereby    a     URL     on     the     form
              http://www.foo.com/~username is changed into
              a request where the docroot for that particular  
           request is set to the directory 
              \verb+~username/public_html/+ The default value is false.

\item       \verb+allowed_scripts = [ListOfSuffixes]+ -
              The allowed script types for this server.  Recognized
              are `yaws', `cgi', `php'.  Default is 
              \verb+allowed_scripts = yaws+.

\item       \verb+php_exe_path = Path+ -
              The name of (and possibly path to) the php executable
	      used to interpret php scripts (if allowed).  Default is
	      \verb+php_exe_path = php+.

\item       \verb+appmods = [ListOfModuleNames]+ -
              If any the names in ListOfModuleNames appear
              as components in the path for a request, the
              path request parsing will terminate and that
              module will be called.

              Assume for example  that  we  have  the  URL
              http://www.hyber.org/myapp/foo/bar/baz?user=joe
              while we have the module foo defined  as  an
              appmod,  the  function  foo:out(Arg) will be
              invoked instead of searching the file systems
              below the point foo.

              The  Arg argument will have the missing path
              part supplied in its appmoddata field.

\item     \verb+errormod_404 = Module+ -
              It is possible to set a special module  that
              handles 404 Not Found messages.

              The function Module:out404(Arg, GC, SC) will
              be invoked. The arguments are

              Arg is a arg{} record

              GC  is  a  gconf{}   record   (defined   in
              yaws.hrl)

              SC   is   a   sconf{}  record  (defined  in
              yaws.hrl)

              The function can and must do the same things
              that a normal \verb+out/1+ does.

\item       \verb+errormod_crash = Module+ -
              It  is possible to set a special module that
              handles the HTML generation of server  crash
              messages.  The  default  is  to  display the
              entire  formated  crash   message   in   the
              browser.  This is good for debugging but not
              in production.

              The function Module:crashmsg(Arg,  SC,  Str)
              will  be  called.  The Str is the real crash
              message formated as a string.

\item       \verb+arg_rewrite_mod = Module+ -
              It is possible  to  install  a  module  that
              rewrites  all  the  Arg arg{} records at an
              early stage in the \Yaws\  server.  This can be
              used to do various things such as checking a
              cookie, rewriting paths etc.


\item        \verb+<ssl>  .... </ssl>+
              This begins and ends  an  SSL  configuration
              for this server.
\begin{itemize}
\item        \verb+keyfile = File+ -
              Specifies  which  file  contains the private
              key for the certificate.

\item        \verb+certfile = File+ -
              Specifies which file contains  the  certificate for the server.

\item        \verb+cacertfile = File+
              File  If  the  server  is  setup  to require
              client certificates. This file needs to contain  
              all the certificates of the acceptable
              signers for the client certs.

\item        \verb+verify = 1 | 2 | 3+
              Specifies  the  level  of  verification  the
              server  does  on client certs. 1 means nothing
              , 2 means the the  server  will  ask  the
              client for a cert but not fail if the client
              doesn't supply a client cert, 3  means  that
              the  server  requires the client to supply a
              client cert.

\item        \verb+depth = Int+
              Specifies the depth  of  certificate  chains
              the  server is prepared to follow when verifying 
              client certs.

\item        \verb+password = String+ - 
              String If the private key  is  encrypted  on
              disk,  this  password  is  the  3des  key to
              decrypt it.

\item        c\verb+ciphers = String+
              This  string  specifies  the  ssl  cipher
              string.  The syntax of the ssl cipher string
              is a little horrible sub language of its own.
              It  is  documented  in  the ssl man page for
              "ciphers".

\item        \verb+</ssl>+
              Ends an SSL definition
\end{itemize}


\item       \verb+<auth> ... </auth>+
              Defines an  auth  structure.  The  following
              items  are allowed within a matching pair of
              <auth> and </auth> delimiters.

\begin{itemize}

\item       \verb+dir = Dir+
              Makes Dir to be controlled bu  WWW-authenticate  
              headers.  In  order for a user to have
              access to WWW-Authenticate controlled  directory, 
              the user must supply a password.

\item       \verb+realm = Realm+
              In  the  directory  defined  here,  the WWW-Authenticate 
              Realm is set to this value.

\item       \verb+user = User:Password+
              Inside this directory,  the  user  User  has
              access  if  the  user  supplies the password
              Password in the pop up dialog presented  by
              the  browser.  We can obviously have several
              of  these  value  inside  a  single   <auth>
              </auth> pair.

\item       \verb+</auth>+
              Ends an auth definition


\end{itemize}

\end{itemize}







\section{Configuration Examples}



       The  following  example  defines a single server on
       port 80.

\begin{verbatim}
       logdir = /var/log/yaws
       <server www.mydomain.org>
               port = 80
               listen = 192.168.128.31
               docroot = /var/yaws/www
       </server>

\end{verbatim}
       And this example shows a similar setup but two web
       servers on the same IP address

\begin{verbatim}
       logdir = /var/log/yaws
       <server www.mydomain.org>
               port = 80
               listen = 192.168.128.31
               docroot = /var/yaws/www
       </server>

       <server www.funky.org>
               port = 80
               listen = 192.168.128.31
               docroot = /var/yaws/www_funky_org
       </server>

\end{verbatim}



When there are several
virtual hosts defined for the same IP number and port,
and an HTTP request arrives with a Host field that does
not match any defined virtual host, then the one which
defined ``first'' in the file is choosen.


An example with www-authenticate and no access logging at all.

\begin{verbatim}
       logdir = /var/log/yaws
       <server www.mydomain.org>
               port = 80
               listen = 192.168.128.31
               docroot = /var/yaws/www
               access_log = false
               <auth>
                   dir = /var/yaws/www/secret
                   realm = foobar
                   user = jonny:verysecretpwd
                   user = benny:thequestion
                   user = ronny:havinganamethatendswithy
              </auth>

       </server>
\end{verbatim}

       And  finally  a  slightly more complex example with
       two servers on the same IP, and one ssl server on a
       different IP.

       The \verb+is_default+ is used to select the funky server if
       someone types in for example http://192.168.128.31/ in his/her
       browser.
              

\begin{verbatim}
       logdir = /var/log/yaws
       max_num_cached_files = 8000
       max_num_cached_bytes = 6000000

       <server www.mydomain.org>
               port = 80
               listen = 192.168.128.31
               docroot = /var/yaws/www
       </server>

       <server www.funky.org>
               port = 80
               is_default = true
               listen = 192.168.128.31
               docroot = /var/yaws/www_funky_org
       </server>

       <server www.funky.org>
               port = 443
               listen = 192.168.128.32
               docroot = /var/yaws/www_funky_org
               <ssl>
                  keyfile = /etc/funky.key
                  certfile = /etc/funky.cert
                  password = gazonk
               </ssl>
       </server>
\end{verbatim}


\end{document}