File: user-guide.stex

package info (click to toggle)
nanopass-framework-scheme 1.9%2Bgit20160429.g1f7e80b-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,056 kB
  • sloc: makefile: 17
file content (2752 lines) | stat: -rw-r--r-- 108,787 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
\documentclass[letterpaper,10pt]{book}
\usepackage{fullpage}
\usepackage{scheme}
\usepackage[pdftitle="Nanopass Framework Users Guide",
            pdfauthor="Andrew W. Keep",
            pdfdisplaydoctitle]{hyperref}

\title{Nanopass Framework Users Guide\thanks{This documentation is largely
extracted from Chapter 2 of my dissertation~\cite{keep-phdthesis-2013}.
The user guide has been updated to reflect recent updates the nanopass
framework.
Several example passes and languages have also been replaced with a more
recent, publicly available example compiler.}}
\author{Andrew W. Keep}

\def\TODO#1{{\textcolor{red}{#1}}}
\newcommand{\dash}[1][1em]{\raise.5ex\hbox to #1{\leaders\hrule\hfil}}
\mathchardef\mhyphen="2D
\parskip 6pt
\parindent 0pt
\begin{document}
\maketitle

\chapter{Introduction} % 2.1
The nanopass framework is an embedded DSL for writing compilers.
The framework provides two main syntactic forms: \scheme{define-language} and
\scheme{define-pass}.
The \scheme{define-language} form specifies the grammar of an intermediate
language.
The \scheme{define-pass} form specifies a pass that operates over an input
language and produces another, possibly different, output language.

\section{A Little Nanopass Framework History}
The idea of writing a compiler as a series of small, single-purpose passes
grew out of a course on compiler construction taught by Dan
Friedman in 1999 at Indiana University.
The following year, R. Kent Dybvig and Oscar Waddell joined Friedman
to refine the idea of the {\it micropass compiler} into a set of assignments
that could be used in a single semester to construct a compiler for a subset of
Scheme.
The micropass compiler uses an S-expression pattern matcher
developed by Friedman to simplify the matching and rebuilding of language terms.
Erik Hilsdale added a support for
catamorphisms~\cite{Meijer:1991:FPB:645420.652535} that provides a more
succinct syntax for recurring
into sub-terms of the language, which further simplified pass development.

Passes in a micropass compiler are easy to understand, as each pass is
responsible for just one transformation.
The compiler is easier to debug when compared with a traditional compiler
composed of a few, multi-task passes.
The output from each pass can be inspected to ensure that it meets grammatical and
extra-grammatical constraints.
The output from each pass can also be tested in the host Scheme system to ensure
that the output of each pass evaluates to the value of the initial expression.
This makes it easier to isolate broken passes and identify bugs.
The compiler is more flexible than a compiler composed of a few, multi-task passes.
New passes can easily be added between existing passes, which allows
experimentation with new optimizations.
In an academic setting, writing compilers composed of many, single-task passes
is useful for assigning extra compiler passes to
advanced students who take the course.

Micropass compilers are not without drawbacks.
First, efficiency can be a problem due to pattern-matching overhead and the
need to rebuild large S-expressions.
Second, passes often contain boilerplate code to recur through otherwise
unchanging language forms.
For instance, in a pass to remove one-armed \scheme{if} expressions, where only
the \scheme{if} form changes, other forms in the language must be
handled explicitly to locate embedded \scheme{if} expressions.
Third, the representation lacks formal structure.
The grammar of each intermediate language can be documented in comments, but
the structure is not enforced.

The \scheme{define-language} and \scheme{define-pass} syntactic forms are used
by the nanopass framework to address these problems.
A \scheme{define-language} form formally specifies the grammar of an
intermediate language.
A \scheme{define-pass} form defines a pass that operates on one language and
produces output in a possibly different language.
Formally specifying the grammar of an intermediate language and writing passes
based on these intermediate languages
allows the nanopass framework to use a record-based
representation of language terms that is more efficient than the S-expression
representation, autogenerate boilerplate code to recur
through otherwise unchanging language forms, and generate checks to verify that
the output of each pass adheres to the output-language grammar.

The summer after Dybvig, Waddell, and Friedman taught their course, Jordan
Johnson implemented an initial prototype of the nanopass framework to support
the construction of micropass compilers.
In 2004, Dipanwita Sarkar, Oscar Waddell, and R. Kent Dybvig developed a
more complete prototype nanopass framework for compiler construction and
submitted a paper on it to ICFP~\cite{Sarkar:2004:NIC:1016850.1016878}.
The initial paper focused on the nanopass framework as a tool capable of
developing both academic and commercial quality compilers.
The paper was accepted but on the condition that it be refocused only on academic
uses.
The reviewers were not convinced that the framework or nanopass construction method
was capable of supporting a commercial compiler.
In retrospect, the reviewers were right.
Sarkar implemented only a few of the passes from the compiler used in the
course on compilers.
This implementation showed that the nanopass framework was viable, but it did
not support the claim
that the nanopass framework could be used for a commercial compiler.
In fact, because the class compiler was started but never completed, it is
unclear whether the prototype was even up to the task of writing the full class
compiler.

The nanopass framework described in this guide improves on the prototype
developed by Sarkar.
In this framework, language definitions are no longer restricted to
top-level definitions.
Additionally, passes can accept more than one argument and return zero or
more values.
Passes can be defined that operate on a subset of a language instead of being
restricted to starting from the entry-point nonterminal of the language.
Passes can also autogenerate nonterminal transformers not supplied by the
compiler writer.
The new nanopass framework also defines two new syntactic forms,
\scheme{nanopass-case} and \scheme{with-output-language}, that allow language
terms to be matched and constructed outside the context of a pass.

\section{The Nanopass Framework Today}
% TODO: Update this line count to reflect the current size of
%       the nanopass framework
Although the nanopass framework defines just two primary syntactic forms, the
macros that implement them are complex, with approximately 4600 lines of code.
In both the prototype and the new version of the nanopass framework, the
\scheme{define-language} macro parses a language definition and stores a
representation of it in the compile-time environment.
This representation can be used to guide the definition of derived languages
and the construction of passes.
Both also create a set of record types used to represent language terms at run
time, along with an unparser for translating the record representation to an
S-expression representation.
Finally, both create meta-parsers to parse S-expression patterns and templates.
An S-expression to record-form parser can also be created from the language
using \scheme{define-parser}.\footnote{In the prototype, this was part of
the functionality of \scheme{define-language}, but in a commercial compiler
we do not frequently need an S-expression parser, so we no longer
autogenerate one.}

The \scheme{define-pass} form, in both versions of the framework, operates
over an input-language term and produces an output-language term.
The input-language meta-parser generates code to match the specified pattern as
records, as well as a set of bindings for the variables named in the pattern.
The output-language meta-parser generates record constructors and
grammar-checking code.
Within a pass definition, a transformer is used to define a translation from an
input nonterminal to an output nonterminal.
Each transformer has a set of clauses that match an input-language term and
construct an output-language term.
The pattern matching also supports
catamorphisms~\cite{Meijer:1991:FPB:645420.652535} for recurring into language
sub-terms.

\section{Examples using the Nanopass Framework}
There are two, publicly available examples of the nanopass framework.
The first is in the {\tt tests} sub-directory of the nanopass framework git
repository at
\href{https://github.com/akeep/nanopass-framework/}{github.com/akeep/nanopass-framework}.
This is part of a student compiler, originally included with the prototype
nanopass framework developed by Sarkar et al.\ and updated to conform with the
changes that have been made in the updated nanopass framework.

The second example is available in the
\href{https://github.com/akeep/scheme-to-c/}{github.com/akeep/scheme-to-c}
repository. 
This compiler is better documented and provides a complete compiler
example targeting fairly low-level C from a simplified Scheme dialect.
It was developed to be presented at
\href{https://clojure-conj.org}{Clojure Conj 2013}, just
days before the Conj started, and compiles a small subset of Scheme to C.
It is similar to the included example, but has the advantage of being a
complete end-to-end compiler that can be run from a Scheme REPL.
It uses {\tt gcc}, targeting a 64-bit platform as the back-end, but I hope can
be modified to target other platforms without too much trouble, or even moved
off of C to target JavaScript, LLVM, or other back ends.

\section{Other Uses of the Nanopass Frameowrk}
The nanopass framework was used to replace the original Chez Scheme
compiler~\cite{dybvig:csug8} with a nanopass version of the compiler.
The nanopass version has officially been released as Chez Scheme version 9.0.
Chez Scheme is a closed-source commercial compiler.

The nanopass framework is also being used as part of the
\href{https://github.com/eholk/harlan}{Harlan} compiler.
Harlan is a general purpose language for developing programs for running on
the GPU.
Harlan uses an S-expression format that is compiled into C++ using OpenCL to
run computational kernels on the GPU.
The source code for Harlan is publicly available at
\href{https://github.com/eholk/harlan}{github.com/eholk/harlan}.

\chapter{Defining Languages and Passes} % old 2.4, new 2.3

The nanopass framework builds on the prototype, originally developed by
Sarkar et al.
The examples in this section are pulled from the Scheme to C compiler available
at \href{https://github.com/akeep/scheme-to-c}{github.com/akeep/scheme-to-c}.

\section{Defining languages}

The nanopass framework operates over a set of compiler-writer-defined
languages.
Languages defined in this way are similar to context-free grammars, in that
they are composed of a set of terminals, a set of nonterminal symbols, a set of
productions for each nonterminal, and a start symbol from the set of
nonterminal symbols.
We refer to the start symbol as the entry nonterminal of the language.
An intermediate language definition for a simple variant of the Scheme
programming language, post macro expansion, might look like:

{\small
\schemedisplay
(define-language Lsrc
  (terminals
    (symbol (x))
    (primitive (pr))
    (constant (c))
    (datum (d)))
  (Expr (e body)
    pr
    x
    c
    (quote d)
    (if e0 e1)
    (if e0 e1 e2)
    (or e* ...)
    (and e* ...)
    (not e)
    (begin e* ... e)
    (lambda (x* ...) body* ... body)
    (let ([x* e*] ...) body* ... body)
    (letrec ([x* e*] ...) body* ... body)
    (set! x e)
    (e e* ...)))
\endschemedisplay
}

\noindent

The \scheme{Lsrc} language defines a subset of Scheme suitable for our
example compiler. 
It is the output language of a more general ``parser'' that
parses S-expressions into \scheme{Lsrc} language forms.
The \scheme{Lsrc} language consists of a set of terminals (listed in the
\scheme{terminals} form) and a single nonterminal \scheme{Expr}.
The terminals of the language are
\begin{itemize}
  \item \scheme{symbol} (for variables),
  \item \scheme{primitive} (for the subset of Scheme primitives support
    by this language),
  \item \scheme{constant} (for the subset of Scheme constants, and
  \item \scheme{datum} (for the subset of Scheme datum supported by this language).
\end{itemize}
The compiler writer must supply a predicate corresponding to each terminal,
lexically visible where the language is defined.
The nanopass framework derives the predicate name from the terminal name by
adding a \scheme{?} to the terminal name.
In this case, the nanopass framework expects \scheme{symbol?},
\scheme{primitive?}, \scheme{constant?}, and \scheme{datum?} to be
lexically visible where \scheme{Lsrc} is defined.

Each terminal clause lists one or more meta-variables, used to refer to the
terminal in nonterminal productions. 
Here, \scheme{x} refers to a \scheme{symbol}, \scheme{pr} refers to
a \scheme{primitive}, \scheme{c} refers to a \scheme{constant},
and \scheme{d} refers to a \scheme{datum}.

For our example compiler, the host Scheme system's \scheme{symbol?} is used
to determine when an item is a variable.

The example compiler also selects a subset of primitives from Scheme and
represents these primitives as symbols.
A \scheme{primitive?} predicate like the following can be used to specify
this terminal.\footnote{In the example compiler, the primitives are specified
in separate association lists to capture the arity of each primitive and the
place in the compiler is handled as it goes through the compiler process.
This complexity has been eliminated for the dicussion here.
Please reference the source code for a more complete discussion of
primitive handling in the example compiler.}

{\small
\schemedisplay
(define primitive?
  (lambda (x)
    (memq x 
      '(cons make-vector box car cdr vector-ref vector-length unbox
        + - * / pair? null? boolean? vector? box? = < <= > >= eq?
        vector-set! set-box!))))
\endschemedisplay
}

\noindent
Our example compiler also limits the constants that can be expressed to a subset of those allowed by Scheme.
The \scheme{constant?} predicate limits these to booleans (\scheme{#t} and
\scheme{#f}), null (\scheme{()}), and appropriately sized integers
(between $-2^{60}$ and $2^{60} - 1$).

{\small
\schemedisplay
(define target-fixnum?
  (lambda (x)
    (and (and (integer? x) (exact? x))
         (<= (- (expt 2 60)) x (- (expt 2 60) 1)))))

(define constant?
  (lambda (x)
    (or (target-fixnum? x) (boolean? x) (null? x))))
\endschemedisplay
}

\noindent
The example compiler limits the Scheme datum that can be represented to
constants, pairs, vectors, and boxes.
The \scheme{datum?} predicate can be defined as follows:

{\small
\schemedisplay
(define datum?
  (lambda (x)
    (or (constant? x)
        (and (box? x) (datum? (unbox x)))
        (and (pair? x) (datum? (car x)) (datum? (cdr x)))
        (and (vector? x)
             (let loop ([i (vector-length x)])
               (or (fx=? i 0)
                   (let ([i (fx- i 1)])
                     (and (datum? (vector-ref x i))
                          (loop i)))))))))

\endschemedisplay
}

\noindent
The \scheme{Lsrc} language also defines the nonterminal \scheme{Expr}.
Nonterminals start with a name, followed by a list of meta-variables and a set
of grammar productions.
In this case, the name is \scheme{Expr}, and two meta-variables, \scheme{e} and
\scheme{body}, are specified. 
Just like the meta-variables named in the terminals clause, nonterminal
meta-variables are used to represent the nonterminal in nonterminal
productions.
Each production follows one of three forms.
It is a single meta-variable, an S-expression that starts with a
keyword, or an S-expression that does not start with a keyword (referred to as an
\emph{implicit} production).
The S-expression forms cannot include keywords past the initial starting
keyword.
In \scheme{Lsrc}, the \scheme{x}, \scheme{c}, and \scheme{pr} productions are
the single meta-variable productions and indicate that a stand-alone
\scheme{symbol}, \scheme{constant}, or \scheme{primitive} are valid
\scheme{Expr}s.
The only implicit S-expression production is the \scheme{(e e* ...)}
production, and it indicates a call that takes zero or more
\scheme{Expr}s as arguments.
(The \scheme{*} suffix on \scheme{e} is used by convention to indicate
plurality and does not have any semantic meaning: It is the \scheme{...} that
indicates that the field can take zero or more \scheme{Expr}s.)
The rest of the productions are S-expression productions with keywords that
correspond to the Scheme syntax that they represent.

In addition to the star, \scheme{*}, suffix mentioned earlier in the call
productions, meta-variable references can also use a
numeric suffix (as in the productions for \scheme{if}), a question mark (\scheme{?}), or a caret (\scheme{^}).
The \scheme{?} suffix is intended for use with \scheme{maybe} meta-variables,
and the \scheme{^} is used when expressing meta-variables with a more
mathematical syntax than the numeric suffixes provide.
Suffixes can also be used in combination.
References to meta-variables in a production must be unique, and the suffixes
allow the same root name to be used more than once.

Language definitions can also include more than one nonterminal, as the
following language illustrates:

{\small
\schemedisplay
(define-language L8
  (terminals
    (symbol (x a))
    (constant (c))
    (void+primitive (pr)))
  (entry Expr)
  (Expr (e body)
    x
    le
    (quote c)
    (if e0 e1 e2)
    (begin e* ... e)
    (set! x e)
    (let ([x* e*] ...) abody)
    (letrec ([x* le*] ...) body)
    (primcall pr e* ...)
    (e e* ...))
  (AssignedBody (abody)
    (assigned (a* ...) body) => body)
  (LambdaExpr (le)
    (lambda (x* ...) abody)))
\endschemedisplay
}

\noindent
This language has three nonterminals, \scheme{Expr}, \scheme{AssignedBody},
and \scheme{LambdaExpr}.
When more than one nonterminal is specified, one must be selected as the entry
point.
In language \scheme{L8}, the \scheme{Expr} nonterminal is selected as the entry
nonterminal by the \scheme{(entry Expr)} clause.
When the entry clause is not specified, the first nonterminal listed is
implicitly selected as the entry point.

The \scheme{L8} language uses a single terminal meta-variable production,
\scheme{x},
to indicate that a stand-alone \scheme{symbol} is a valid \scheme{Expr}.
In addition, the \scheme{L8} language uses a single nonterminal meta-variable
production, \scheme{le}, to indicate that any \scheme{LambdaExpr} production is
also a valid \scheme{Expr}.
The \scheme{LambdaExpr} is separated from \scheme{Expr} because the
\scheme{letrec} production is now limited to binding \scheme{symbol}s to
\scheme{LambdaExpr}s.

The \scheme{assigned} production of the \scheme{AssignedBody} nonterminal
utilizes a the \scheme{=>} syntax to indicate a pretty unparsing form.
This allows the unparser that is automatically produced by
\scheme{define-language} to generate an S-expression that can be evaluated in
the host Scheme system.
In this case, the \scheme{assigned} from is not a valid Scheme form, so we
simply eliminated the \scheme{assigned} wrapper and list of assigned variables
when unparsing.\footnote{Unparsers can also produce the non-pretty from by
passing both the language form to be unparsed and a \scheme{#f} to indicate
the pretty form should not be used.}

In addition to the nanopass framework providing a syntax for specifying list
structures in a language
production, it is also possible to indicate that a field of a language
production might not contain a (useful) value.
The following language has an example of this:

{\small
\schemedisplay
(define-language Lopt
  (terminals
    (uvar (x))
    (label (l))
    (constant (c))
    (primitive (pr)))
  (Expr (e body)
    x
    (quote c)
    (begin e* ... e)
    (lambda (x* ...) body)
    (let ([x* e*] ...) body)
    (letrec ([x* le*] ...) body)
    (pr e* ...)
    (call (maybe l) (maybe e) e* ...))
  (LambdaExpr (le)
    (lambda (x* ...) body)))
\endschemedisplay
}

\noindent
The \scheme{(maybe l)} field indicates that either a label, \scheme{l}, or
\scheme{#f} will be provided.
Here, \scheme{#f} is a stand-in for bottom, indicating that the value is not
specified.
The \scheme{(maybe e)} field indicates that either an \scheme{Expr} or
\scheme{#f} will be provided.

Instead of using \scheme{(maybe l)} to indicate a label that might be provided,
a \scheme{maybe-label} terminal that serves the same purpose could be added.
It is also possible to eliminate the \scheme{(maybe e)} form, although it
requires the creation of a separate nonterminal that has both an \scheme{e}
production and a production to represent $\bot$, when no \scheme{Expr} is
available.

\section{Extending languages\label{subsec:extended-define-language}}

The first ``pass'' of the example compiler is a simple expander that produces
\scheme{Lsrc} language forms from S-expressions.
The next pass takes the \scheme{Lsrc} language and removes the one-armed-if
expressions, replacing them with a two-armed-if that results in the void value
being produced by the expression when the test clause is false.
code appropriate to construct these constants.
The output grammar of this pass changes just one production of the language,
exchanging potentially complex quoted datum with quoted
constants and making explicit the code to build the constant pairs and vectors when the program
begins execution.

The compiler writer could specify the new language by rewriting the
\scheme{Lsrc} language and replacing the appropriate terminal forms.
Rewriting each language in its full form, however, can result in verbose
source code, particularly in a compiler like the class compiler, which has
nearly 30 different intermediate languages.
Instead, the nanopass framework supports a language extension form.
The output language can be specified as follows:

{\small
\schemedisplay
(define-language L1
  (extends Lsrc)
  (terminals
    (- (primitive (pr)))
    (+ (void+primitive (pr))))
  (Expr (e body)
    (- (if e0 e1))))
\endschemedisplay
}

\noindent
The \scheme{L1} language removes the \scheme{primitive} terminal and replaces it
with the \scheme{void+primitive} terminal.
It also removes the \scheme{(if e0 e1)} production.
A language extension form is indicated by including the \scheme{extends}
clause, in this case \scheme{(extends Lsrc)}, that indicates that this is
an extension of the given base language.
In a language extension, the \scheme{terminals} form now contains
subtraction clauses, in
this case \scheme{(- (primitive (pr)))}, and addition clauses, in this case
\scheme{(+ (void+primitive (pr)))}.
These addition and subtraction clauses can contain one or more terminal
specifiers.
The nonterminal syntax is similarly modified, with the subtraction clause, in
this case \scheme{(- (if e0 e1))}, that indicates productions to be removed
and an addition clause that indicates productions to be added, in this case
no productions are added.

The list of meta-variables indicated for the nonterminal form is also updated
to use the set in the extension language.
It is important to include not only the meta-variables named in the language
extension but also those for terminal and nonterminal forms that will be
maintained from the base language.
Otherwise, these meta-variables will be unbound in the extension language,
leading to errors.

Nonterminals can be removed in an extended language by removing all of the
productions of the nonterminal.
New nonterminals can be added in an extended language by adding the
productions of the new nonterminal.
For instance, language \scheme{L15} removes the \scheme{x}, \scheme{(qoute c)},
and \scheme{(label l)} productions from the \scheme{Expr} nonterminal and
adds the \scheme{SimpleExpr} nonterminal.

{\small
\schemedisplay
  (define-language L15
    (extends L14)
    (Expr (e body)
      (- x
         (quote c)
         (label l)
         (primcall pr e* ...)
         (e e* ...))
      (+ se
         (primcall pr se* ...) => (pr se* ...)
         (se se* ...)))
    (SimpleExpr (se)
      (+ x
         (label l)
         (quote c))))
\endschemedisplay
}

\subsection{The {\tt define-language} form}

The \scheme{define-language} syntax has two related forms. 
The first form fully specifies a new language.
The second form uses the \scheme{extends} clause to indicate that the language
is an extension of an existing base language.

Both forms of \scheme{define-language} start with the same basic syntax:

{\small
\schemedisplay
(define-language \var{language-name} \var{clause} ...)
\endschemedisplay
}

\noindent
where \var{clause} is an \scheme{extension} clause, an \scheme{entry} clause, a
\scheme{terminals} clause, or a nonterminal clause.

\noindent
\textbf{Extension clause.}
The extension clause indicates that the new language is an extension of an existing
language.
This clause slightly changes the syntax of the \scheme{define-language} form
and is described in Section~\ref{subsec:extended-define-language}.

\noindent
\textbf{Entry clause.}
The entry clause specifies which nonterminal is the starting point for this
language.
This information is used when generating passes to determine which nonterminal
should be expected first by the pass.
This default can be overridden in a pass definition, as described in
Section~\ref{sec:pass-syntax}.
The entry clause has the following form:

{\small
\schemedisplay
(entry \var{nonterminal-name})
\endschemedisplay
}

\noindent
where \var{nonterminal-name} corresponds to one of the nonterminals specified
in this language.
Only one entry clause can be specified in a language definition.

\noindent
\textbf{Terminals clause.}
The terminals clause specifies one or more terminals used by the language.
For instance, in the \scheme{Lsrc} example language, the terminals clause
specifies three terminal types: \scheme{uvar}, \scheme{primitive}, and
\scheme{datum}.
The terminals clause has the following form:

{\small
\schemedisplay
(terminals \var{terminal-clause} ...)
\endschemedisplay
}

\noindent
where \var{terminal-clause} has one of the following forms:

{\small
\schemedisplay
(\var{terminal-name} (\var{meta-var} ...))
(=> (\var{terminal-name} (\var{meta-var} ...)) \var{prettifier})
(\var{terminal-name} (\var{meta-var} ...)) => \var{prettifier}
\endschemedisplay
}

Here,
\partopsep=-\parskip
\begin{itemize}
\item \var{terminal-name} is the name of the terminal, and a corresponding
\scheme{\var{terminal-name}?} predicate function exists to determine whether a
Scheme object is of this type when checking the output of a pass,
\item \var{meta-var} is the name of a meta-variable used for referring to this
terminal type in language and pass definitions, and
\item \var{prettifier} is a procedure expression of one argument used
when the language unparser is called in ``pretty'' mode to produce
a pretty, S-expression representation.
\end{itemize}
The final form is syntactic sugar for the form above it.
When the \var{prettifier} is omitted, no processing is done on the terminal
when the unparser runs.

\noindent
\textbf{Nonterminal clause.}
A nonterminal clause specifies the valid productions in a language.
Each nonterminal clause has a name, a set of meta-variables, and a set of
productions.
A nonterminal clause has the following form:

{\small
\schemedisplay
(\var{nonterminal-name} (\var{meta-var} ...)
  \var{production-clause}
  ...)
\endschemedisplay
}

\noindent
where \var{nonterminal-name} is an identifier that names the nonterminal,
\var{meta-var} is the name of a meta-variable used when referring to this
nonterminal in language and pass definitions, and \var{production-clause}
has one of the following forms:

{\small
\schemedisplay
\var{terminal-meta-var}
\var{nonterminal-meta-var}
\var{production-s-expression}
(\var{keyword} . \var{production-s-expression})
\endschemedisplay
}

\noindent
Here,
\begin{itemize}
\item \var{terminal-meta-var} is a terminal meta-variable that is a stand-alone
production for this nonterminal,
\item \var{nonterminal-meta-var} is a nonterminal meta-variable that
indicates that any form allowed by the specified nonterminal is also allowed by
this nonterminal,
\item \var{keyword} is an identifier that must be matched exactly when parsing
an S-expression representation, language input pattern, or language output
template, and
\item \var{production-s-expression} is an S-expression that represents a
pattern for production and has the following form:
\end{itemize}

{\small
\schemedisplay
\var{meta-variable}
(maybe \var{meta-variable})
(\var{production-s-expression} \var{ellipsis})
(\var{production-s-expression} \var{ellipsis} \var{production-s-expression} ... . \var{production-s-expression})
(\var{production-s-expression} . \var{production-s-expression})
() 
\endschemedisplay
}

\noindent
Here,
\begin{itemize}
\item \var{meta-variable} is any terminal or nonterminal meta-variable
extended with an arbitrary number of digits, followed by an arbitrary
combination of \scheme{*}, \scheme{?}, or \scheme{^} characters; for example,
if the meta-variable is \scheme{e}, then \scheme{e1}, \scheme{e*}, \scheme{e?},
and \scheme{e4*?} are all valid meta-variable expressions;
\item \scheme{(maybe \var{meta-variable})} indicates that an element in the
production is either of the type of the meta-variable or bottom (represented by
\scheme{#f}); and
\item \var{ellipsis} is the literal \scheme{...} and indicates that a list of
the \var{production-s-expression} that proceeds it is expected.
\end{itemize}
Thus, a Scheme language form such as \scheme{let} can be represented as a
language production as:

{\small
\schemedisplay
(let ([x* e*] ...) body* ... body)
\endschemedisplay
}

\noindent
where \scheme{let} is the \var{keyword}, \scheme{x*} is a meta-variable that
indicates a list of variables, \scheme{e*} and \scheme{body*} are
meta-variables that each indicate a list of expressions, and \scheme{body} is a
meta-variable that indicates a single expression.

Using the \scheme{maybe} form, something similar to the named-let form could
be represented as follows:

{\small
\schemedisplay
(let (maybe x) ([x* e*] ...) body* ... body)
\endschemedisplay
}

\noindent
although this would be slightly different from the normal named-let form, in that
the non-named form would then need an explicit \scheme{#f} to indicate that no name
was specified.

\subsection{Extensions with the {\tt define-language} form\label{subsubsec:extended-define-language}}

A language defined as an extension of an existing language has a slightly
modified syntax to indicate what should be added to or removed from
the base language to create the new language.
A compiler writer indicates that a language is an extension by using an
extension clause.

\noindent
\textbf{Extension clause.}
The extension clause has the following form:

{\small
\schemedisplay
(extends \var{language-name})
\endschemedisplay
}

\noindent
where \var{language-name} is the name of an already defined language.
Only one extension clause can be specified in a language definition. 

\noindent
\textbf{Entry clause.}
The entry clause does not change syntactically in an extended language.
It can, however, name a nonterminal from the base language that is retained in
the extended language.

\noindent
\textbf{Terminals clause.}
When a language derives from a base language, the \scheme{terminals} clause has the following form:

{\small
\schemedisplay
(terminals \var{extended-terminal-clause} ...)
\endschemedisplay
}

\noindent
where \var{extended-terminal-clause} has one of the following forms:

{\small
\schemedisplay
(+ \var{terminal-clause} ...)
(- \var{terminal-clause} ...)
\endschemedisplay
}

\noindent
where the \var{terminal-clause} uses the syntax for terminals specified in the
non-extended \scheme{terminals} form.
The \scheme{+} form indicates terminals that should be added to the new language.
The \scheme{-} form indicates terminals that should be removed from the list in
the old language when producing the new language.
Terminals not mentioned in a terminals clause will be copied unchanged into the new
language.
Note that adding and removing \var{meta-var}s from a terminal currently
requires removing the terminal type and re-adding it.
This can be done in the same step with a \scheme{terminals} clause, similar to the following:

{\small
\schemedisplay
(terminals
  (- (variable (x)))
  (+ (variable (x y))))
\endschemedisplay
}

\noindent
\textbf{Nonterminal clause.}
When a language extends from a base language, a nonterminal clause has the
following form:

{\small
\schemedisplay
(\var{nonterminal-name} (\var{meta-var} ...)
  \var{extended-production-clause}
  ...)
\endschemedisplay
}

\noindent
where \var{extended-production-clause} has one of the following forms:

{\small
\schemedisplay
(+ \var{production-clause} ...)
(- \var{production-clause} ...)
\endschemedisplay
}

\noindent
The \scheme{+} form indicates nonterminal productions that should be added to
the nonterminal in the new language.
The \scheme{-} form indicates nonterminal productions that should not be
copied from the list of productions for this nonterminal in the base language when
producing the new language.
Productions not mentioned in a nonterminal clause will be copied unchanged into the
nonterminal in the new language.
If a nonterminal has all of its productions removed in a new language, the
nonterminal will be dropped in the new language.
Conversely, new nonterminals can be added by naming the new nonterminal and
using the \scheme{+} form to specify the productions of the new nonterminal.

\subsection{Products of {\tt define-language}}

The \scheme{define-language} form produces the following user-visible bindings:
\begin{itemize}
\item a language definition, bound to the specified \var{language-name};
\item an unparser (named \scheme{unparse-\var{language-name}}) that can be used
to unparse a record-based representation back into an S-expression representation; and
\item a set of predicates that can be used to identify a term of the language
or a term from a specified nonterminal in the language.
\end{itemize}

It also produces the following internal bindings:
\begin{itemize}
\item a meta-parser that can be used by the \scheme{define-pass} macro to parse
the patterns and templates used in passes and
\item a set of record definitions that will be used to represent the language
forms.
\end{itemize}

The \scheme{Lsrc} language, for example, will bind the identifier
\scheme{Lsrc} to the language definition, produce an unparser named
\scheme{unparse-Lsrc}, and create two predicates, \scheme{Lsrc?} and
\scheme{Lsrc-Expr?}.
The language definition is used when the \var{language-name} is specified as
the base of a new language definition and in the definition of a pass.

The \scheme{define-parser} form can also be used to create a simple parser for
parsing S-expressions into language forms as follows:

{\small
\schemedisplay
(define-parser \var{parser-name} \var{language-name})
\endschemedisplay
}

\noindent
The parser does not support backtracking; thus, grammars must be specified, either by specifying a keyword or by having
different length S-expressions so that the productions are unique.

For instance, the following language definition cannot be parsed because all
four of the \scheme{set!} forms have the same keyword and are S-expressions of
the same length:

{\small
\schemedisplay
(define-language Lunparsable 
  (terminals
    (variable (x))
    (binop (binop))
    (integer-32 (int32))
    (integer-64 (int64)))
  (Program (prog)
    (begin stmt* ... stmt))
  (Statement (stmt)
    (set! x0 int64)
    (set! x0 x1)
    (set! x0 (binop x1 int32))
    (set! x0 (binop x1 x2))))
\endschemedisplay
}

\noindent
Instead, the \scheme{Statement} nonterminal must be broken into multiple
nonterminals, as in the following language:

{\small
\schemedisplay
(define-language Lparsable
  (terminals
    (variable (x))
    (binop (binop))
    (integer-32 (int32))
    (integer-64 (int64)))
  (Program (prog)
    (begin stmt* ... stmt))
  (Statement (stmt)
    (set! x rhs))
  (Rhs (rhs)
    x
    int64
    (binop x arg))
  (Argument (arg)
    x
    int32))
\endschemedisplay
}

\section{Defining passes\label{sec:define-pass}}

Passes are used to specify transformations over languages defined by using
\scheme{define-language}.
Before going into the formal details of defining passes, we need to take a look
at a simple pass to convert an input program from the \scheme{Lsrc}
intermediate language to the \scheme{L1} intermediate language.
This pass removes the one-armed-if by making the
result of the \scheme{if} expression explicit when the predicate is false.

We define a pass called \scheme{remove-one-armed-if} to accomplish this
task, without using any of the
catamorphism~\cite{Meijer:1991:FPB:645420.652535} or
autogeneration features of the nanopass framework.
Below, we can see how this feature helps eliminate boilerplate code.

{\small
\schemedisplay
(define-pass remove-one-armed-if : Lsrc (e) -> L1 ()
  (Expr : Expr (e) -> Expr ()
    [(if ,e0 ,e1) `(if ,(Expr e0) ,(Expr e1) (void))]
    [,pr pr]
    [,x x]
    [,c c]
    [(quote ,d) `(quote ,d)]
    [(if ,e0 ,e1 ,e2) `(if ,(Expr e0) ,(Expr e1) ,(Expr e2))]
    [(or ,e* ...) `(or ,(map Expr e*) ...)]
    [(and ,e* ...) `(and ,(map Expr e*) ...)]
    [(not ,e) `(not ,(Expr e))]
    [(begin ,e* ... ,e) `(begin ,(map Expr e*) ... ,(Expr e))]
    [(lambda (,x* ...) ,body* ... ,body)
     `(lambda (,x* ...) ,(map Expr body*) ... ,(Expr body))]
    [(let ([,x* ,e*] ...) ,body* ... ,body)
     `(let ([,x* ,(map Expr e*)] ...)
       ,(map Expr body*) ... ,(Expr body))]
    [(letrec ([,x* ,e*] ...) ,body* ... body)
     `(letrec ([,x* ,(map Expr e*)] ...)
        ,(map Expr body*) ... ,(Expr body))]
    [(set! ,x ,e) `(set! ,x ,(Expr e))]
    [(,e ,e* ...) `(,(Expr e) ,(map Expr e*) ...)])
  (Expr e))
\endschemedisplay
}

\noindent
The pass definition starts with a name (in this case,
\scheme{remove-one-armed-if})
and a signature.
The signature starts with an input-language specifier (e.g. \scheme{Lsrc}),
along with a list of formals.
Here, there is just one formal, \scheme{e}, for the input-language term.
The second part of the signature has an output-language specifier (in this case,
\scheme{L1}), as well as a list of extra return values (in this case, empty).

Following the name and signature, is an optional definitions clause, not
used in this pass.
The \scheme{definitions} clause can contain any Scheme expression valid in a
definition context.

Next, a transformer from the input nonterminal \scheme{Expr} to the output
nonterminal \scheme{Expr} is defined.
The transformer is named \scheme{Expr} and has a signature similar to that
of the pass, with an input-language nonterminal and list of formals followed
by the output-language nonterminal and list of extra-return-value expressions.

The transformer has a clause that processes each production of the \scheme{Expr}
nonterminal.
Each clause consists of an input pattern, an optional \scheme{guard} clause,
and one or more expressions that specify zero or more return values based on the
signature.
The input pattern is derived from the S-expression productions specified
in the input language.
Each variable in the pattern is denoted by unquote (\scheme{,}).
For instance, the clause for the \scheme{set!} production matches the pattern
\scheme{(set! ,x ,e)}, binds \scheme{x} to the \scheme{symbol} specified by the
\scheme{set!} and \scheme{e} to the \scheme{Expr} specified by the
\scheme{set!}.

% I might do this as an asside, if I could figure out how to bend LaTeX to my
% will enough to do that.
The variable names used in pattern bindings are based on the meta-variables
listed in the language definition.
This allows the pattern to be further restricted.
For instance, if we wanted to match only \scheme{set!} forms that had a
variable reference as the RHS, we could specify our pattern as
\scheme{(set! ,x0 ,x1)}, which would be equivalent of using our original
pattern with the \scheme{guard} clause: \scheme{(guard (symbol? e))}.

The output-language expression is constructed using the \scheme{`(set! ,x ,(Expr e))} quasiquoted template.
Here, quasiquote, (\scheme{`}), is rebound to a form that can construct language
forms based on the template, and unquote (\scheme{,}), is used to escape back
into Scheme.
The \scheme{,(Expr e)} thus puts the result of the recursive call of
\scheme{Expr} into the output-language \scheme{(set! x e)} form.

Following the \scheme{Expr} transformer is the body of the pass, which calls
\scheme{Expr} to transform the \scheme{Lsrc} \scheme{Expr} term into an \scheme{L1}
\scheme{Expr} term and wraps the result in a \scheme{let} expression if any
structured  quoted datum are found in the program that is being compiled.

In place of the explicit recursive calls to \scheme{Expr}, the compiler writer
can use the catamorphism syntax to indicate the recurrence, as in the
following version of the pass.

{\small
\schemedisplay
(define-pass remove-one-armed-if : Lsrc (e) -> L1 ()
  (Expr : Expr (e) -> Expr ()
    [(if ,[e0] ,[e1]) `(if ,e0 ,e1 (void))]
    [,pr pr]
    [,x x]
    [,c c]
    [(quote ,d) `(quote ,d)]
    [(if ,[e0] ,[e1] ,[e2]) `(if ,e0 ,e1 ,e2)]
    [(or ,[e*] ...) `(or ,e* ...)]
    [(and ,[e*] ...) `(and ,e* ...)]
    [(not ,[e]) `(not ,e)]
    [(begin ,[e*] ... ,[e]) `(begin ,e* ... ,e)]
    [(lambda (,x* ...) ,[body*] ... ,[body])
     `(lambda (,x* ...) ,body* ... ,body)]
    [(let ([,x* ,[e*]] ...) ,[body*] ... ,[body])
     `(let ([,x* ,e*] ...)
       ,body* ... ,body)]
    [(letrec ([,x* ,[e*]] ...) ,[body*] ... ,[body])
     `(letrec ([,x* ,e*] ...)
        ,body* ... ,body)]
    [(set! ,x ,[e]) `(set! ,x ,e)]
    [(,[e] ,[e*] ...) `(,e ,e* ...)])
  (Expr e))
\endschemedisplay
}

\noindent
Here, the square brackets that wrap the unquoted variable expression in a
pattern indicate that a catamorphism should be applied.
For instance, in the \scheme{set!} clause, the \scheme{,e} from the previous
pass becomes \scheme{,[e]}.
When the catamorphism is included on an element that is followed by an
ellipsis, \scheme{map} is used to process the elements of the list and to construct
the output list.

% another place for this to be an aside with a link down to the
% catamorphism section
Using a catamorphism changes, slightly, the meaning of the meta-variables used
in the pattern matcher.
Instead of indicatinng a input language restriction that must be met, it
indicates an output type that is expected.
In the \scheme{set!} clause example, we use \scheme{e} for both, because our
input language and output language both use \scheme{e} to refer to
their \scheme{Expr} nonterminal.
The nanopass framwork uses the input type and the output type, along with any
additional input values and extra expected return values to determine which
transformer should be called.
In some cases, specifically where a single input nonterminal form is
transformed into an equivalent output nonterminal form, these transformers can
be autogenerated by the framework.

Using catamorphisms helps to make the pass more succinct, but there is still
boilerplate code in the pass that the framework can fill in for the compiler
writer.
Several clauses simply match the input-language production and generate a matching
output-language production (modulo the catamorphisms for nested \scheme{Expr} forms).
Because the input and output languages are defined, the \scheme{define-pass}
macro can automatically generate these clauses.
Thus, the same functionality can be expressed as follows:


{\small
\schemedisplay
(define-pass remove-one-armed-if : Lsrc (e) -> L1 ()
  (Expr : Expr (e) -> Expr ()
    [(if ,[e0] ,[e1]) `(if ,e0 ,e1 (void))]))
\endschemedisplay
}

\noindent
In this version of the pass, only the one-armed-\scheme{if} form 
is explicitly processed.
The \scheme{define-pass} form automatically generates the other clauses.
Although all three versions of this pass perform the same task, the final form is the
closest to the initial intention of replacing just the one-armed-if form with a two-armed-if.

In addition to \scheme{define-pass} autogenerating the clauses of a transformer, \scheme{define-pass} can also
autogenerate the transformers for nonterminals that must be traversed but are
otherwise unchanged in a pass.
For instance, one of the passes in the class compiler removes complex
expressions from the right-hand side of the \scheme{set!} form.
At this point in the compiler, the language has several nonterminals:

{\small
\schemedisplay
(define-language L18
  (entry Program)
  (terminals
    (integer-64 (i))
    (effect+internal-primitive (epr))
    (non-alloc-value-primitive (vpr))
    (symbol (x l))
    (predicate-primitive (ppr))
    (constant (c)))
  (Program (prog)
    (labels ([l* le*] ...) l))
  (SimpleExpr (se)
    x
    (label l)
    (quote c))
  (Value (v body)
    (alloc i se)
    se
    (if p0 v1 v2)
    (begin e* ... v)
    (primcall vpr se* ...)
    (se se* ...))
  (Effect (e)
    (set! x v)
    (nop)
    (if p0 e1 e2)
    (begin e* ... e)
    (primcall epr se* ...)
    (se se* ...))
  (Predicate (p)
    (true)
    (false)
    (if p0 p1 p2)
    (begin e* ... p)
    (primcall ppr se* ...))
  (LocalsBody (lbody)
    (locals (x* ...) body))
  (LambdaExpr (le)
    (lambda (x* ...) lbody)))
\endschemedisplay
}

\noindent
The pass, however, is only interested in the \scheme{set!} form and the
\scheme{Value} form in the right-hand-side position of the \scheme{set!} form.
Relying on the autogeneration of transformers, this pass can be written as:

{\small
\schemedisplay
(define-pass flatten-set! : L18 (e) -> L19 ()
  (SimpleExpr : SimpleExpr (se) -> SimpleExpr ())
  (Effect : Effect (e) -> Effect ()
    [(set! ,x ,v) (flatten v x)])
  (flatten : Value (v x) -> Effect ()
    [,se `(set! ,x ,(SimpleExpr se))]
    [(primcall ,vpr ,[se*] ...) `(set! ,x (primcall ,vpr ,se* ...))]
    [(alloc ,i ,[se]) `(set! ,x (alloc ,i ,se))]
    [(,[se] ,[se*] ...) `(set! ,x (,se ,se* ...))]))
\endschemedisplay
}

\noindent
Here, the \scheme{Effect} transformer has just one clause for matching the
\scheme{set!} form.
The \scheme{flatten} transformer is called to produce the final \scheme{Effect}
form.
The \scheme{flatten} transformer, in turn, pushes the \scheme{set!} form into
the \scheme{if} and \scheme{begin} forms and processes the contents of these
forms, which produces a final \scheme{Effect} form.
Note that the \scheme{if} and \scheme{begin} forms do not need to be provided
by the compiler writer.
This is because the input and output language provide enough structure that the
nanopass framework can automatically generate the appropriate clauses.
In the case of \scheme{begin} it will push the \scheme{set!} form into the
final, value producing, expression of the \scheme{begin} form.
In the case of the \scheme{if} it will push the \scheme{set!} form into both
the consquent and alternative of the if form, setting the variable at the
final, value producing expression on both possible execution paths.
The \scheme{define-pass} macro autogenerates transformers for \scheme{Program},
\scheme{LambdaExpr}, \scheme{LocalsBody}, \scheme{Value}, and
\scheme{Predicate} that recur through the input-language forms and produce the
output-language forms.
The \scheme{SimpleExpr} transformer only needs to be written to give a name to
the transformer so that it can be called by \scheme{flatten}.

It is sometimes necessary to pass more information than just
the language term to a transformer.
The transformer syntax allows extra formals to be named to support passing this information.
For example, in the pass from the scheme to C compiler that converts the
\scheme{closures} form into explicit calls to procedure primitives, the closure
pointer, \scheme{cp}, and the list of free variables, \scheme{free*}, are passed
to the \scheme{Expr} transformer.

{\small
\schemedisplay
(define-pass expose-closure-prims : L12 (e) -> L13 ()
  (Expr : Expr (e [cp #f] [free* '()]) -> Expr ()
    (definitions
      (define handle-closure-ref
        (lambda (x cp free*)
          (let loop ([free* free*] [i 0])
            (cond
              [(null? free*) x]
              [(eq? x (car free*)) `(primcall closure-ref ,cp (quote ,i))]
              [else (loop (cdr free*) (fx+ i 1))]))))
      (define build-closure-set*
        (lambda (x* l* f** cp free*)
          (fold-left
            (lambda (e* x l f*)
              (let loop ([f* f*] [i 0] [e* e*])
                (if (null? f*)
                    (cons `(primcall closure-code-set! ,x (label ,l)) e*)
                    (loop (cdr f*) (fx+ i 1)
                      (cons `(primcall closure-data-set! ,x (quote ,i)
                               ,(handle-closure-ref (car f*) cp free*))
                        e*)))))
            '()
            x* l* f**))))
    [(closures ([,x* ,l* ,f** ...] ...)
       (labels ([,l2* ,[le*]] ...) ,[body]))
     (let ([size* (map length f**)])
       `(let ([,x* (primcall make-closure (quote ,size*))] ...)
          (labels ([,l2* ,le*] ...)
            (begin
              ,(build-closure-set* x* l* f** cp free*) ...
              ,body))))]
    [,x (handle-closure-ref x cp free*)]
    [((label ,l) ,[e*] ...) `((label ,l) ,e* ...)]
    [(,[e] ,[e*] ...) `((primcall closure-code ,e) ,e* ...)])
 (LabelsBody : LabelsBody (lbody) -> Expr ())
 (LambdaExpr : LambdaExpr (le) -> LambdaExpr ()
   [(lambda (,x ,x* ...) (free (,f* ...) ,[body x f* -> body]))
    `(lambda (,x ,x* ...) ,body)]))
\endschemedisplay
}

\noindent
The catamorphism and clause autogeneration facilities are also aware of the extra
formals expected by transformers.
In a catamorphism, this means that extra arguments need not be specified in
the catamorphism, if the formals are available in the transformer.
For instance, in the \scheme{Expr} transformer,
the catamorphism specifies only the binding of the output \scheme{Expr} form,
and \scheme{define-pass} matches the name of the formal to the transformer with the
expected argument.
In the \scheme{LambdaExpr} transformer, the extra arguments need to be
specified, both because they are not available as a formal of the transformer
and because the values change at the \scheme{LambdaExpr} boundary.
Autogenerated clauses in \scheme{Expr} also call the
\scheme{Expr} transformer with the extra arguments from the formals.

The \scheme{expose-closure-prims} pass also specifies default values for the
extra arguments passed to the \scheme{Expr} transformer.
It defaults the \scheme{cp} variable to \scheme{#f} and the \scheme{free*}
variable to the empty list.
The default values will only be used in calls to the \scheme{Expr} transformer
when the no other value is available.
In this case, this happen only when the \scheme{Expr} transformer is first
called in the body of the pass.
This is consistent with the body of the program, which cannot contain any free
variables and hence does not need a closure pointer.
Once we begin processing within the body of a \scheme{lambda} we then have a
closure pointer, with the list of free variables, if any.

Sometimes it is also necessary for a pass to return more than one value.
The nanopass framework relies upon Scheme's built-in functionality for dealing
with returning of multiple return values.
To inform the nanopass framework that a given transformer is returning more
than one value, we use the signature to tell the framework both how many values
we are expecting to return, and what the default values should be when a clause
is autogenerated.
For instance, the \scheme{uncover-free} pass returns two values, the language
form and the list of free variables.

{\small
\schemedisplay
(define-pass uncover-free : L10 (e) -> L11 ()
  (Expr : Expr (e) -> Expr (free*)
    [(quote ,c) (values `(quote ,c) '())]
    [,x (values x (list x))]
    [(let ([,x* ,[e* free**]] ...) ,[e free*])
     (values `(let ([,x* ,e*] ...) ,e)
       (apply union (difference free* x*) free**))]
    [(letrec ([,x* ,[le* free**]] ...) ,[body free*])
     (values `(letrec ([,x* ,le*] ...) ,body)
       (difference (apply union free* free**) x*))]
    [(if ,[e0 free0*] ,[e1 free1*] ,[e2 free2*])
     (values `(if ,e0 ,e1 ,e2) (union free0* free1* free2*))]
    [(begin ,[e* free**] ... ,[e free*])
     (values `(begin ,e* ... ,e) (apply union free* free**))]
    [(primcall ,pr ,[e* free**]...)
     (values `(primcall ,pr ,e* ...) (apply union free**))]
    [(,[e free*] ,[e* free**] ...)
     (values `(,e ,e* ...) (apply union free* free**))])
  (LambdaExpr : LambdaExpr (le) -> LambdaExpr (free*)
    [(lambda (,x* ...) ,[body free*])
     (let ([free* (difference free* x*)])
       (values `(lambda (,x* ...) (free (,free* ...) ,body)) free*))])
  (let-values ([(e free*) (Expr e)])
    (unless (null? free*) (error who "found unbound variables" free*))
    e))
\endschemedisplay
}

Transformers can also be written that handle terminals instead of nonterminals.
Because terminals have no structure, the body of such transformers is simply a
Scheme expression.
The Scheme to C compiler does not make use of this feature, but we could
imagine a pass where references to variables are replaced with already
specified locations, such as the following pass:

{\small
\schemedisplay
(define-pass replace-variable-refereces : L23 (x) -> L24 ()
  (uvar-reg-fv : symbol (x env) -> location ()
    (cond [(and (uvar? x) (assq x env)) => cdr] [else x]))
  (SimpleExpr : SimpleExpr (x env) -> Triv ())
  (Rhs : Rhs (x env) -> Rhs ())
  (Pred : Pred (x env) -> Pred ())
  (Effect : Effect (x env) -> Effect ())
  (Value : Value (x env) -> Value ())
  (LocalsBody : LocalsBody (x) -> Value ()
    [(finished ([,x* ,loc*] ...) ,vbody) (Value vbody (map cons x* loc*))]))
\endschemedisplay
}

\noindent
The two interesting parts of this pass are the \scheme{LocalsBody} transformer
that creates the environment that maps variables to locations and the
\scheme{uvar-reg-fv} transformer that replaces variables with the appropriate
location.
In this pass, transformers cannot be autogenerated because extra arguments are
needed, and the nanopass framework only autogenerates transformers without extra
arguments or return values.
The autogeneration is limited to help reign in some of the unpredictable
behavior that can result from autogenerated transformers.

Passes can also be written that do not take a language form but that produce a
language form.
The initial parser for the Scheme to C compiler is a good example of this.
It expects an S-expression that conforms to an input grammar for the subset of
Scheme supported by the compiler. 

{\small
\schemedisplay
(define-pass parse-and-rename : * (e) -> Lsrc ()
  (definitions
    (define process-body
      (lambda (who env body* f)
        (when (null? body*) (error who "invalid empty body"))
        (let loop ([body (car body*)] [body* (cdr body*)] [rbody* '()])
          (if (null? body*)
              (f (reverse rbody*) (Expr body env))
              (loop (car body*) (cdr body*)
                (cons (Expr body env) rbody*))))))
    (define vars-unique?
      (lambda (fmls)
        (let loop ([fmls fmls])
          (or (null? fmls)
              (and (not (memq (car fmls) (cdr fmls)))
                   (loop (cdr fmls)))))))
    (define unique-vars
      (lambda (env fmls f)
        (unless (vars-unique? fmls)
          (error 'unique-vars "invalid formals" fmls))
        (let loop ([fmls fmls] [env env] [rufmls '()])
          (if (null? fmls)
              (f env (reverse rufmls))
              (let* ([fml (car fmls)] [ufml (unique-var fml)])
                (loop (cdr fmls) (cons (cons fml ufml) env)
                  (cons ufml rufmls)))))))
    (define process-bindings
      (lambda (rec? env bindings f)
        (let loop ([bindings bindings] [rfml* '()] [re* '()])
          (if (null? bindings)
              (unique-vars env rfml*
                (lambda (new-env rufml*)
                  (let ([env (if rec? new-env env)])
                    (let loop ([rufml* rufml*]
                                [re* re*]
                                [ufml* '()]
                                [e* '()])
                      (if (null? rufml*)
                          (f new-env ufml* e*)
                          (loop (cdr rufml*) (cdr re*)
                            (cons (car rufml*) ufml*)
                            (cons (Expr (car re*) env) e*)))))))
              (let ([binding (car bindings)])
                (loop (cdr bindings) (cons (car binding) rfml*)
                  (cons (cadr binding) re*)))))))
    (define Expr*
      (lambda (e* env)
        (map (lambda (e) (Expr e env)) e*)))
    (with-output-language (Lsrc Expr)
      (define build-primitive
        (lambda (as)
          (let ([name (car as)] [argc (cdr as)])
            (cons name
              (if (< argc 0)
                  (error who
                    "primitives with arbitrary counts are not currently supported"
                    name)
                  (lambda (env . e*)
                    (if (= (length e*) argc)
                        `(,name ,(Expr* e* env) ...)
                        (error name "invalid argument count"
                          (cons name e*)))))))))
      (define initial-env
        (cons*
          (cons 'quote (lambda (env d)
                         (unless (datum? d)
                           (error 'quote "invalid datum" d))
                         `(quote ,d)))
          (cons 'if (case-lambda
                      [(env e0 e1) `(if ,(Expr e0 env) ,(Expr e1 env))]
                      [(env e0 e1 e2)
                        `(if ,(Expr e0 env) ,(Expr e1 env) ,(Expr e2 env))]
                      [x (error 'if (if (< (length x) 3)
                                        "too few arguments"
                                        "too many arguments")
                           x)]))
          (cons 'or (lambda (env . e*) `(or ,(Expr* e* env) ...)))
          (cons 'and (lambda (env . e*) `(and ,(Expr* e* env) ...)))
          (cons 'not (lambda (env e) `(not ,(Expr e env))))
          (cons 'begin (lambda (env . e*)
                         (process-body env e*
                           (lambda (e* e)
                             `(begin ,e* ... ,e)))))
          (cons 'lambda (lambda (env fmls . body*)
                          (unique-vars env fmls
                            (lambda (env fmls)
                              (process-body 'lambda env body*
                                (lambda (body* body)
                                  `(lambda (,fmls ...)
                                     ,body* ... ,body)))))))
          (cons 'let (lambda (env bindings . body*)
                       (process-bindings #f env bindings
                         (lambda (env x* e*)
                           (process-body 'let env body*
                             (lambda (body* body)
                               `(let ([,x* ,e*] ...) ,body* ... ,body)))))))
          (cons 'letrec (lambda (env bindings . body*)
                          (process-bindings #t env bindings
                            (lambda (env x* e*)
                              (process-body 'letrec env body*
                                (lambda (body* body)
                                  `(letrec ([,x* ,e*] ...)
                                     ,body* ... ,body)))))))
          (cons 'set! (lambda (env x e)
                        (cond
                          [(assq x env) =>
                            (lambda (as)
                              (let ([v (cdr as)])
                                (if (symbol? v)
                                    `(set! ,v ,(Expr e env))
                                    (error 'set! "invalid syntax"
                                      (list 'set! x e)))))]
                          [else (error 'set! "set to unbound variable"
                                  (list 'set! x e))])))
          (map build-primitive user-prims)))
      ;;; App - helper for handling applications.
      (define App
        (lambda (e env)
          (let ([e (car e)] [e* (cdr e)])
            `(,(Expr e env) ,(Expr* e* env) ...))))))
  (Expr : * (e env) -> Expr ()
    (cond
      [(pair? e)
       (cond
         [(assq (car e) env) =>
          (lambda (as)
            (let ([v (cdr as)])
              (if (procedure? v)
                  (apply v env (cdr e))
                  (App e env))))]
         [else (App e env)])]
      [(symbol? e)
       (cond
         [(assq e env) =>
          (lambda (as)
            (let ([v (cdr as)])
              (cond
                [(symbol? v) v]
                [(primitive? e) e]
                [else (error who "invalid syntax" e)])))]
         [else (error who "unbound variable" e)])]
      [(constant? e) e]
      [else (error who "invalid expression" e)]))
  (Expr e initial-env))
\endschemedisplay
}

\noindent
The \scheme{parse-and-rename} pass is structured similarly to a simple expander with
keywords and primitives.\footnote{It could easily be extended to handle simple macros, in this case, just the fixed \scheme{and} macro,
\scheme{or} macro, and \scheme{not} macro would be available.}
It also performs syntax checking to ensure that the input grammar conforms to
the expected input grammar.
Finally, it produces an \scheme{Lsrc} language term that represents the Scheme
program to be compiled.

In the pass syntax, the \scheme{*} in place of the input-language name indicates
that no input-language term should be expected.
The \scheme{Expr} and \scheme{Application} transformers do not have pattern
matching clauses, as the input could be of any form.
The quasiquote is, however, rebound because an output language is specified.

It can also be useful to create passes without an output language.
The final pass of the Scheme to C compiler is the code generator that emits C
code.

{\small
\schemedisplay
(define-pass generate-c : L22 (e) -> * ()
  (definitions
    (define string-join
      (lambda (str* jstr)
        (cond
          [(null? str*) ""]
          [(null? (cdr str*)) (car str*)]
          [else (string-append (car str*) jstr (string-join (cdr str*) jstr))])))
    (define symbol->c-id
      (lambda (sym)
        (let ([ls (string->list (symbol->string sym))])
          (if (null? ls)
              "_"
              (let ([fst (car ls)])
                (list->string
                  (cons
                    (if (char-alphabetic? fst) fst #\_)
                    (map (lambda (c)
                           (if (or (char-alphabetic? c)
                                   (char-numeric? c))
                               c
                               #\_))
                      (cdr ls)))))))))
    (define format-function-header
      (lambda (l x*)
        (format "ptr ~a(~a)" l
          (string-join
            (map
              (lambda (x)
                (format "ptr ~a" (symbol->c-id x)))
              x*)
            ", "))))
    (define format-label-call
      (lambda (l se*)
        (format "  ~a(~a)" (symbol->c-id l)
          (string-join
            (map (lambda (se)
                   (format "(ptr)~a" (format-simple-expr se)))
              se*)
            ", "))))
    (define format-general-call
      (lambda (se se*)
        (format "((ptr (*)(~a))~a)(~a)"
          (string-join (make-list (length se*) "ptr") ", ")
          (format-simple-expr se)
          (string-join
            (map (lambda (se)
                   (format "(ptr)~a" (format-simple-expr se)))
              se*)
            ", "))))
    (define format-binop
      (lambda (op se0 se1)
        (format "((long)~a ~a (long)~a)"
          (format-simple-expr se0)
          op
          (format-simple-expr se1))))
    (define format-set!
      (lambda (x rhs)
        (format "~a = (ptr)~a" (symbol->c-id x) (format-rhs rhs)))))
  (emit-function-decl : LambdaExpr (le l) -> * ()
    [(lambda (,x* ...) ,lbody)
     (printf "~a;~%" (format-function-header l x*))])
  (emit-function-def : LambdaExpr (le l) -> * ()
    [(lambda (,x* ...) ,lbody)
     (printf "~a {~%" (format-function-header l x*))
     (emit-function-body lbody)
      (printf "}~%~%")])
  (emit-function-body : LocalsBody (lbody) -> * ()
    [(locals (,x* ...) ,body)
     (for-each (lambda (x) (printf "  ptr ~a;~%" (symbol->c-id x))) x*)
     (emit-value body x*)])
  (emit-value : Value (v locals*) -> * ()
    [(if ,p0 ,v1 ,v2)
     (printf "  if (~a) {~%" (format-predicate p0))
     (emit-value v1 locals*)
     (printf "  } else {~%")
     (emit-value v2 locals*)
     (printf "  }~%")]
    [(begin ,e* ... ,v)
     (for-each emit-effect e*)
     (emit-value v locals*)]
    [,rhs (printf "  return (ptr)~a;\n" (format-rhs rhs))])
  (format-predicate : Predicate (p) -> * (str)
    [(if ,p0 ,p1 ,p2)
     (format "((~a) ? (~a) : (~a))"
       (format-predicate p0)
       (format-predicate p1)
       (format-predicate p2))]
    [(<= ,se0 ,se1) (format-binop "<=" se0 se1)]
    [(< ,se0 ,se1) (format-binop "<" se0 se1)]
    [(= ,se0 ,se1) (format-binop "==" se0 se1)]
    [(true) "1"]
    [(false) "0"]
    [(begin ,e* ... ,p)
     (string-join
       (fold-right (lambda (e s*) (cons (format-effect e) s*))
         (list (format-predicate p)) e*)
       ", ")])
  (format-effect : Effect (e) -> * (str)
    [(if ,p0 ,e1 ,e2)
     (format "((~a) ? (~a) : (~a))"
       (format-predicate p0)
       (format-effect e1)
       (format-effect e2))]
    [((label ,l) ,se* ...) (format-label-call l se*)]
    [(,se ,se* ...) (format-general-call se se*)]
    [(set! ,x ,rhs) (format-set! x rhs)]
    [(nop) "0"]
    [(begin ,e* ... ,e)
     (string-join
       (fold-right (lambda (e s*) (cons (format-effect e) s*))
         (list (format-effect e)) e*)
       ", ")]
    [(mset! ,se0 ,se1? ,i ,se2)
     (if se1?
         (format "((*((ptr*)((long)~a + (long)~a + ~d))) = (ptr)~a)"
           (format-simple-expr se0) (format-simple-expr se1?)
           i (format-simple-expr se2))
         (format "((*((ptr*)((long)~a + ~d))) = (ptr)~a)"
           (format-simple-expr se0) i (format-simple-expr se2)))])
  (format-simple-expr : SimpleExpr (se) -> * (str)
    [,x (symbol->c-id x)]
    [,i (number->string i)]
    [(label ,l) (format "(*~a)" (symbol->c-id l))]
    [(logand ,se0 ,se1) (format-binop "&" se0 se1)]
    [(shift-right ,se0 ,se1) (format-binop ">>" se0 se1)]
    [(shift-left ,se0 ,se1) (format-binop "<<" se0 se1)]
    [(divide ,se0 ,se1) (format-binop "/" se0 se1)]
    [(multiply ,se0 ,se1) (format-binop "*" se0 se1)]
    [(subtract ,se0 ,se1) (format-binop "-" se0 se1)]
    [(add ,se0 ,se1) (format-binop "+" se0 se1)]
    [(mref ,se0 ,se1? ,i) 
     (if se1?
         (format "(*((ptr)((long)~a + (long)~a + ~d)))"
           (format-simple-expr se0)
           (format-simple-expr se1?) i)
         (format "(*((ptr)((long)~a + ~d)))" (format-simple-expr se0) i))])
  ;; prints expressions in effect position into C statements
  (emit-effect : Effect (e) -> * ()
    [(if ,p0 ,e1 ,e2)
     (printf "  if (~a) {~%" (format-predicate p0))
     (emit-effect e1)
     (printf "  } else {~%")
     (emit-effect e2)
     (printf "  }~%")]
    [((label ,l) ,se* ...) (printf "  ~a;\n" (format-label-call l se*))]
    [(,se ,se* ...) (printf "  ~a;\n" (format-general-call se se*))]
    [(set! ,x ,rhs) (printf "  ~a;\n" (format-set! x rhs))]
    [(nop) (if #f #f)]
    [(begin ,e* ... ,e)
     (for-each emit-effect e*)
     (emit-effect e)]
    [(mset! ,se0 ,se1? ,i ,se2)
     (if se1?
         (printf "(*((ptr*)((long)~a + (long)~a + ~d))) = (ptr)~a;\n"
           (format-simple-expr se0) (format-simple-expr se1?)
           i (format-simple-expr se2))
         (printf "(*((ptr*)((long)~a + ~d))) = (ptr)~a;\n"
           (format-simple-expr se0) i (format-simple-expr se2)))])
  ;; formats the right-hand side of a set! into a C expression
  (format-rhs : Rhs (rhs) -> * (str)
    [((label ,l) ,se* ...) (format-label-call l se*)]
    [(,se ,se* ...) (format-general-call se se*)]
    [(alloc ,i ,se)
     (if (use-boehm?)
         (format "(ptr)((long)GC_MALLOC(~a) + ~dl)"
           (format-simple-expr se) i)
         (format "(ptr)((long)malloc(~a) + ~dl)"
           (format-simple-expr se) i))]
    [,se (format-simple-expr se)])
  ;; emits a C program for our progam expression
  (Program : Program (p) -> * ()
    [(labels ([,l* ,le*] ...) ,l)
     (let ([l (symbol->c-id l)] [l* (map symbol->c-id l*)])
       (define-syntax emit-include
         (syntax-rules ()
           [(_ name) (printf "#include <~s>\n" 'name)]))
       (define-syntax emit-predicate
         (syntax-rules ()
           [(_ PRED_P mask tag)
             (emit-c-macro PRED_P (x) "(((long)x & ~d) == ~d)" mask tag)]))
       (define-syntax emit-eq-predicate
         (syntax-rules ()
           [(_ PRED_P rep)
             (emit-c-macro PRED_P (x) "((long)x == ~d)" rep)]))
       (define-syntax emit-c-macro
         (lambda (x)
           (syntax-case x()
             [(_ NAME (x* ...) fmt args ...)
               #'(printf "#define ~s(~a) ~a\n" 'NAME
                   (string-join (map symbol->string '(x* ...)) ", ")
                   (format fmt args ...))])))
       ;; the following printfs output the tiny C runtime we are using
       ;; to wrap the result of our compiled Scheme program.
       (emit-include stdio.h)
       (if (use-boehm?)
           (emit-include gc.h)
           (emit-include stdlib.h))
       (emit-predicate FIXNUM_P fixnum-mask fixnum-tag)
       (emit-predicate PAIR_P pair-mask pair-tag)
       (emit-predicate BOX_P box-mask box-tag)
       (emit-predicate VECTOR_P vector-mask vector-tag)
       (emit-predicate PROCEDURE_P closure-mask closure-tag)
       (emit-eq-predicate TRUE_P true-rep)
       (emit-eq-predicate FALSE_P false-rep)
       (emit-eq-predicate NULL_P null-rep)
       (emit-eq-predicate VOID_P void-rep)
       (printf "typedef long* ptr;\n")
       (emit-c-macro FIX (x) "((long)x << ~d)" fixnum-shift)
       (emit-c-macro UNFIX (x) "((long)x >> ~d)" fixnum-shift)
       (emit-c-macro UNBOX (x) "((ptr)*((ptr)((long)x - ~d)))" box-tag)
       (emit-c-macro VECTOR_LENGTH_S (x) "((ptr)*((ptr)((long)x - ~d)))" vector-tag)
       (emit-c-macro VECTOR_LENGTH_C (x) "UNFIX(VECTOR_LENGTH_S(x))")
       (emit-c-macro VECTOR_REF (x i) "((ptr)*((ptr)((long)x - ~d + ((i+1) * ~d))))"
         vector-tag word-size)
       (emit-c-macro CAR (x) "((ptr)*((ptr)((long)x - ~d)))" pair-tag)
       (emit-c-macro CDR (x) "((ptr)*((ptr)((long)x - ~d + ~d)))" pair-tag word-size)
       (printf "void print_scheme_value(ptr x) {\n")
       (printf "  long i, veclen;\n")
       (printf "  ptr p;\n")
       (printf "  if (TRUE_P(x)) {\n")
       (printf "    printf(\"#t\");\n")
       (printf "  } else if (FALSE_P(x)) {\n")
       (printf "    printf(\"#f\");\n")
       (printf "  } else if (NULL_P(x)) {\n")
       (printf "    printf(\"()\");\n")
       (printf "  } else if (VOID_P(x)) {\n")
       (printf "    printf(\"(void)\");\n")
       (printf "  } else if (FIXNUM_P(x)) {\n")
       (printf "    printf(\"%ld\", UNFIX(x));\n")
       (printf "  } else if (PAIR_P(x)) {\n")
       (printf "    printf(\"(\");\n")
       (printf "    for (p = x; PAIR_P(p); p = CDR(p)) {\n")
       (printf "      print_scheme_value(CAR(p));\n")
       (printf "      if (PAIR_P(CDR(p))) { printf(\" \"); }\n")
       (printf "    }\n")
       (printf "    if (NULL_P(p)) {\n")
       (printf "      printf(\")\");\n")
       (printf "    } else {\n")
       (printf "      printf(\" . \");\n")
       (printf "      print_scheme_value(p);\n")
       (printf "      printf(\")\");\n")
       (printf "    }\n")
       (printf "  } else if (BOX_P(x)) {\n")
       (printf "    printf(\"#(box \");\n")
       (printf "    print_scheme_value(UNBOX(x));\n")
       (printf "    printf(\")\");\n")
       (printf "  } else if (VECTOR_P(x)) {\n")
       (printf "    veclen = VECTOR_LENGTH_C(x);\n")
       (printf "    printf(\"#(\");\n")
       (printf "    for (i = 0; i < veclen; i += 1) {\n")
       (printf "      print_scheme_value(VECTOR_REF(x,i));\n")
       (printf "      if (i < veclen) { printf(\" \"); } \n")
       (printf "    }\n")
       (printf "    printf(\")\");\n")
       (printf "  } else if (PROCEDURE_P(x)) {\n")
       (printf "    printf(\"#(procedure)\");\n")
       (printf "  }\n")
       (printf "}\n")
       (map emit-function-decl le* l*)
       (map emit-function-def le* l*)
       (printf "int main(int argc, char * argv[]) {\n")
       (printf "  print_scheme_value(~a());\n" l)
       (printf "  printf(\"\\n\");\n")
       (printf "  return 0;\n")
       (printf "}\n"))]))
\endschemedisplay
}

\noindent
Again, a \scheme{*} is used to indicate that there is no language form in this
case for the output language.
The C code is printed to the standard output port.
Thus, there is no need
for any return value from this pass.

Passes can also return a value that is not a language form.
For instance, we could write the \scheme{simple?} predicate from \scheme{purify-letrec} pass as its own pass, rather than using the \scheme{nanopass-case} form.
It would look something like the following:

{\small
\schemedisplay
(define-pass simple? : (L8 Expr) (e bound* assigned*) -> * (bool)
  (simple? : Expr (e) -> * (bool)
    [(quote ,c) #t]
    [,x (not (or (memq x bound*) (memq x assigned*)))]
    [(primcall ,pr ,e* ...)
     (and (effect-free-prim? pr) (for-all simple? e*))]
    [(begin ,e* ... ,e) (and (for-all simple? e*) (simple? e))]
    [(if ,e0 ,e1 ,e2) (and (simple? e0) (simple? e1) (simple? e2))]
    [else #f])
  (simple? e))
\endschemedisplay
}

\noindent
Here, the extra return value is indicated as \scheme{bool}.
The \scheme{bool} here is used to indicate to \scheme{define-pass} that an
extra value is being returned.
Any expression can be used in this position.
In this case, the \scheme{bool} identifier will simply be an unbound variable
if it is ever manifested.
It is not manifested in this case, however, because the body is explicitly
specified; thus, no code will be autogenerated for the body of the pass.

\subsection{The {\tt define-pass} syntactic form\label{sec:pass-syntax}}

The \scheme{define-pass} form has the following syntax.

{\small
\schemedisplay
(define-pass \var{name} : \var{lang-specifier} (\var{fml} ...) -> \var{lang-specifier} (\var{extra-return-val-expr} ...)
  \var{definitions-clause}
  \var{transformer-clause} ...
  \var{body-expr} ...)
\endschemedisplay
}

\noindent
where \var{name} is an identifier to use as the name for the procedure
definition.
The \var{lang-specifier} has one of the following forms:

{\small
\schemedisplay
*
\var{lang-name}
(\var{lang-name} \var{nonterminal-name})
\endschemedisplay
}

\noindent
where
\begin{itemize}
\item \var{lang-name} refers to a language defined with the
\scheme{define-language} form, and
\item \var{nonterminal-name} refers to a nonterminal named within the language
definition.
\end{itemize}
When the \scheme{*} form is used as the input \var{lang-specifier}, it indicates
that the pass does not expect an input-language term.
When there is no input language, the transformers within the pass do not have
clauses with pattern matches because, without an input language, the \scheme{define-pass} macro
does not know what the structure of the input term will be.
When the \scheme{*} form is used as the output \var{lang-specifier}, it
indicates that the pass does not produce an output-language term and should not
be checked.
When there is no output language, the transformers within the pass do not bind
\scheme{quasiquote}, and there are no templates on the right-hand side of the
transformer matches.
It is possible to use the \scheme{*} specifier for both the input and output
\var{lang-specifier}.
This effectively turns the pass, and the transformers contained within it, into an
ordinary Scheme function.

When the \var{lang-name} form is used as the input \var{lang-specifier}, it
indicates that the pass expects an input-language term that is one of the
productions from the entry nonterminal.
When the \var{lang-name} form is used as the output \var{lang-specifier}, it
indicates that the pass expects that an output-language term will be produced and
checked to be one of the records that represents a production of the entry
nonterminal.

When the (\var{lang-name} \var{nonterminal-name}) form is used as the
input-language specifier, it indicates that the input-language term will be a
production from the specified nonterminal in the specified input language.
When the (\var{lang-name} \var{nonterminal-name}) form is used as the
output-language specifier, it indicates that the pass will produce an output
production from the specified nonterminal of the specified output language.

The \var{fml} is a Scheme identifier, and if the input \var{lang-specifier} is
not \scheme{*}, the first \var{fml} refers to the input-language term.

The \var{extra-return-val-expr} is any valid Scheme expression that is valid in value context.
These expressions are scoped within the binding of the identifiers named as
\var{fml}s.

The optional \var{definitions-clause} has the following form:

{\small
\schemedisplay
(definitions \var{scheme-definition} ...)
\endschemedisplay
}

\noindent
where \var{scheme-definition} is any Scheme expression that can be used in
definition context.
Definitions in the \var{definitions-clause} are in the same lexical scope as
the transformers, which means that procedures and macros defined in the
\var{definitions-clause} can refer to any transformer named in a
\var{transformer-clause}.

The \var{definitions-clause} is followed by zero or more
\var{transformer-clauses}s of the following form:

{\small
\schemedisplay
(\var{name} : \var{nt-specifier} (\var{fml-expr} ...) -> \var{nt-specifier} (\var{extra-return-val-expr} ...)
  \var{definitions-clause}?
  \var{transformer-body})
\endschemedisplay
}

\noindent
where \var{name} is a Scheme identifier that can be used to refer to the transformer within the pass.
The input \var{nt-specifier} is one of the following two forms:

{\small
\schemedisplay
*
\var{nonterminal-name}
\endschemedisplay
}

\noindent
When the \scheme{*} form is used as the input nonterminal, it indicates that no
input nonterminal form is expected and that the body of the
\var{transformer-body} will not contain pattern matching clauses.
When the \scheme{*} form is used as the output nonterminal, \scheme{quasiquote}
will not be rebound, and no output-language templates are available.
When both the input and output \var{nt-specifier} are \scheme{*}, the
transformer is effectively an ordinary Scheme procedure.

The \var{fml-expr} has one of the following two forms:

{\small
\schemedisplay
\var{fml}
[\var{fml} \var{default-val-expr}]
\endschemedisplay
}

\noindent
where \var{fml} is a Scheme identifier and \var{default-val-expr} is a Scheme
expression.
The \var{default-val-expr} is used when an argument is not specified in a
catamorphism or when a matching \scheme{fml} is not available in the calling
transformer.
All arguments must be explicitly provided when the transformer is called as an
ordinary Scheme procedure.
Using the catamorphism syntax, the arguments can be explicitly supplied, using
the syntax discussed on page~\pageref{cata:syntax}.
It can also be specified implicitly.
Arguments are filled in implicitly in catamorphisms that do not explicitly
provide the arguments and in autogenerated clauses when the nonterminal
elements of a production are processed.
These implicitly supplied formals are handled by looking for a formal in the
calling transformer that has the same name as the formal expected by the target
transformer.
If no matching formal is found, and the target transformer specifies a default
value, the default value will be used in the call; otherwise, another target
transformer must be found, a new transformer must be autogenerated, or an
exception must be raised to indicate that no transformer was found and none can
be autogenerated.

The \var{extra-return-val-expr} can be any Scheme expression.
These expressions are scoped within the \var{fml}s bound by the transformer.
This allows an input formal to be returned as an extra return value, implicitly
in the autogenerated clauses.
This can be useful for threading values through a transformer.

The optional \var{definitions-clause} can include any Scheme expression that
can be placed in a definition context.
These definitions are scoped within the transformer.
When an output nonterminal is specified, the \scheme{quasiquote} is also bound
within the body of the \scheme{definitions} clause to allow language term
templates to be included in the body of the definitions.

When the input \var{nt-specifier} is not \scheme{*}, the
\var{transformer-body} has one of the following forms:

{\small
\schemedisplay
[\var{pattern} \var{guard-clause} \var{body*} ... \var{body}]
[\var{pattern} \var{body*} ... \var{body}]
[else \var{body*} ... \var{body}]
\endschemedisplay
}

\noindent
where the \scheme{else} clause must be the last one listed in a transformer and
prevents autogeneration of missing clauses (because the \scheme{else} clause is
used in place of the autogenerated clauses).
The \var{pattern} is an S-expression pattern, based on the S-expression
productions used in the language definition.
Patterns can be arbitrarily nested.
Variables bound by the pattern are preceded by an \scheme{unquote} and are
named based on the meta-variables named in the language definition.
The variable name can be used to restrict the pattern by using a meta-variable
that is more specific than the one specified in the language definition.
The \var{pattern} can also contain catamorphisms that have one of the
following forms:

{\small
\label{cata:syntax}
\schemedisplay
[\var{Proc-expr} : \var{input-fml} \var{arg} ... -> \var{output-fml} \var{extra-rv-fml}  ...]
[\var{Transformer-name} : \var{output-fml} \var{extra-rv-fml} ...]
[\var{input-fml} \var{arg} ... -> \var{output-fml} \var{extra-rv-fml} ...]
[\var{output-fml} \var{extra-rv-fml} ...]
\endschemedisplay
}

\noindent
In the first form, the \var{Proc-expr} is an explicitly specified procedure
expression, the \var{input-fml} and all arguments to the procedure are explicitly specified, and the results of calling the \var{Proc-expr} are bound by the \var{output-fml} and \var{extra-rv-fml}s.
Note that the \var{Proc-expr} may be a \var{Transformer-name}.
In the second form, the \var{Transformer-name} is an identifier that refers to
a transformer named in this pass.
The \scheme{define-pass} macro determines, based on the signature of the
transformer referred to by the \var{Transformer-name}, what arguments should be
supplied to the transformer.
In the last two forms, the transformer is determined automatically.
In the third form, the nonterminal type associated with the \var{input-fml},
the \var{arg}s, the output nonterminal type based on the \var{output-fml}, and
the \var{extra-rv-fml}s are used to determine the transformer to call.
In the final form, the nonterminal type for the field within the production,
along with the formals to the calling transformer, the output nonterminal type
based on the \var{output-fml}, and the \var{extra-rv-fml}s are used to
determine the transformer to call.
In the two forms where the transformer is not explicitly named, a new
transformer can be autogenerated when no \var{arg}s and no \var{extra-rv-fml}s
are specified. 
This limitation is in place to avoid creating a transformer with extra formals
whose use is unspecified and extra return values with potentially dubious
return-value expressions.

The \var{input-fml} is a Scheme identifier with a name based on the
meta-variables named in the input-language definition.
The specification of a more restrictive meta-variable name can be used to further
restrict the pattern.
The \var{output-fml} is a Scheme identifier with a name based on the
meta-variables named in the output-language definition.
The \var{extra-rv-fml} is a Scheme identifier.
The \var{input-fml}s named in the fields of a pattern must be unique.
The \var{output-fml}s and \var{extra-rv-fml}s must also be unique, although they
can overlap with the \var{input-fml}s that are shadowed in the body by
the \var{output-fml} or \var{extra-rv-fml} with the same name.

Only the \var{input-fml}s are visible within the optional \var{guard-clause}.
This is because the \var{guard-clause} is evaluated before the catamorphisms
recur on the fields of a production.
The \var{guard-clause} has the following form:

{\small
\schemedisplay
(guard \var{guard-expr} ...)
\endschemedisplay
}

\noindent
where \var{guard-expr} is a Scheme expression.
The \var{guard-clause} has the same semantics as \scheme{and}.

The \var{body*} and \var{body} are any Scheme expression.
When the output \var{nt-specifier} is not \scheme{*},
\scheme{quasiquote} is rebound to a macro that interprets \scheme{quasiquote}
expressions as templates for productions in the output nonterminal.
Additionally, \scheme{in-context} is a macro that can be used to rebind
\scheme{quasiquote} to a different nonterminal.
Templates are specified as S-expressions based on the productions specified by
the output language.
In templates, \scheme{unquote} is used to indicate that the expression in the
\scheme{unquote} should be used to fill in the given field of the production.
Within an \scheme{unquote} expression, \scheme{quasiquote} is rebound to the
appropriate nonterminal based on the expected type of the field in the
production.
If the template includes items that are not \scheme{unquote}d where a field
value is expected, the expression found there is automatically quoted.
This allows self-evaluating items such as symbols, booleans, and numbers to be
more easily specified in templates.
A list of items can be specified in a field that expects a list, using an
ellipsis.
%More than one ellipsis can be specified to flatten out a list of lists.

Although the syntax of a language production is specified as an S-expression,
the record representation used for the language term separates each variable
specified into a separate field.
This means that the template syntax expects a separate value or list of values for
each field in the record.
For instance, in the \scheme{(letrec ([x* e*] ...) body)} production,
a template of the form
\scheme{(letrec (,bindings ...) ,body)} cannot be used
because the nanopass framework will not attempt to break up the
\scheme{bindings} list into its \scheme{x*} and \scheme{e*} component parts.
The template
\scheme{(letrec ([,(map car bindings) ,(map cadr bindings)] ...) ,body)}
accomplishes the same goal, explicitly separating the variables from the expressions.
It is possible that the nanopass framework could be extended to perform the task of
splitting up the \scheme{binding*} list automatically, but it is not done
currently, partially to avoid hiding the cost of deconstructing the
\scheme{binding*} list and constructing the \scheme{x*} and \scheme{e*} lists.

The \scheme{in-context} expression within the body has the following form:

{\small
\schemedisplay
(in-context \var{nonterminal-name} \var{body*} ... \var{body})
\endschemedisplay
}

The \scheme{in-context} form rebinds the \scheme{quasiquote} to  allow
productions from the named nonterminal to be constructed in a context where
they are not otherwise expected.

\chapter{Working with language forms}

\section{Constructing language forms outside of a pass}

In addition to creating language forms using a parser defined with
\scheme{define-parser} or through a pass defined with \scheme{define-pass}, 
language forms can also be created using the
\scheme{with-output-language} form.
The \scheme{with-output-language} form binds the \scheme{in-context}
transformer for the language specified and, if a nonterminal is also specified,
binds the \scheme{quasiquote} form.
This allows the same template syntax used in the body of a transformer to be
used outside of the context of a pass.
In a commercial compiler, such as Chez Scheme, it is often convenient to use
functional abstraction to centralize the creation of a language term.

For instance, in the \scheme{convert-assignments} pass, the
\scheme{with-output-languge} form is wrapped around the \scheme{make-boxes} and
\scheme{build-let} procedures.
This is done so that primitive calls to \scheme{box} along with the \scheme{let} form of the \scheme{L10} language can be constructed with quasiquoted expressions.

{\small
\schemedisplay
(with-output-language (L10 Expr)
  (define make-boxes
    (lambda (t*)
      (map (lambda (t) `(primcall box ,t)) t*)))
  (define build-let
    (lambda (x* e* body)
      (if (null? x*)
          body
          `(let ([,x* ,e*] ...) ,body)))))
\endschemedisplay
}

\noindent
This rebinds both the \scheme{quasiquote} keyword and the \scheme{in-context} keyword.

The \scheme{with-output-language} form has one of the following forms:

{\small
\schemedisplay
(with-output-language \var{lang-name} \var{expr*} ... \var{expr})
(with-output-language (\var{lang-name} \var{nonterminal-name}) \var{expr*} ... \var{expr})
\endschemedisplay
}

\noindent
In the first form, the \scheme{in-context} form is bound and can be used to
specify a \var{nonterminal-name}, as described at the end of
Section~\ref{sec:define-pass}.
In the second form, both \scheme{in-context} and \scheme{quasiquote} are bound.
The \scheme{quasiquote} form is bound in the context of the specified
\var{nonterminal-name}, and templates can be defined just as they are on the
right-hand side of a transformer clause.

The \scheme{with-output-language} form is a splicing form, similar to \scheme{begin}
or \scheme{let-syntax}, allowing multiple definitions or expressions
that are all at the same scoping level as the
\scheme{with-output-language} form to be contained within the form.
This is convenient when writing a set of definitions that all construct some
piece of a language term from the same nonterminal.
This flexibility means that the \scheme{with-output-language} form cannot be
defined as syntactic sugar for the \scheme{define-pass} form.

\section{Matching language forms outside of a pass}

In addition to the \scheme{define-pass} form, it is possible to match a
language term using the \scheme{nanopass-case} form.
This can be useful when creating functional abstractions, such as predicates that
ask a question based on matching a language form.
For instance, suppose we write a \scheme{lambda?} predicate for the
\scheme{L8} language as follows:

{\small
\schemedisplay
(define lambda?
  (lambda (e)
    (nanopass-case (L8 Expr) e
      [(lambda (,x* ...) ,abody) #t]
      [else #f])))
\endschemedisplay
}

\noindent
The \scheme{nanopass-case} form has the following syntax:

{\small
\schemedisplay
(nanopass-case (\var{lang-name} \var{nonterminal-name}) \var{expr}
  \var{matching-clause} ...)
\endschemedisplay
}

\noindent
where \var{matching-clause} has one of the following forms:

{\small
\schemedisplay
[\var{pattern} \var{guard-clause} \var{expr*} ... \var{expr}]
[\var{pattern} \var{expr*} ... \var{expr}]
[else \var{expr*} ... \var{expr}]
\endschemedisplay
}

\noindent
where the \var{pattern} and \var{guard-clause} forms have the same syntax as in
the \var{transformer-body} of a pass.

Similar to \scheme{with-output-language}, \scheme{nanopass-case} provides a
more succinct syntax for matching a language form than does the general
\scheme{define-pass} form.
Unlike the \scheme{with-output-language} form, however, the
\scheme{nanopass-case} form can be implemented in terms of the
\scheme{define-pass} form.
For example, the \scheme{lambda?} predicate also could have been written as:

{\small
\schemedisplay
(define-pass lambda? : (L8 Expr) (e) -> * (bool)
  (Expr : Expr (e) -> * (bool)
    [(lambda (,x* ...) ,abody) #t]
    [else #f])
  (Expr e))
\endschemedisplay
}

\noindent
This is, in fact, how the \scheme{nanopass-case} macro is implemented.

\chapter{Working with languages}

\section{Displaying languages}

The \scheme{language->s-expression} form can be used to print the full definition of a language by supplying it the language
name to be printed.
This can be helpful when working with extended languages, such as in the case of
\scheme{L1}:

{\small
\schemedisplay
(language->s-expression L1)
\endschemedisplay
}

\noindent
which returns:

{\small
\schemedisplay
(define-language L1
  (entry Expr)
  (terminals
    (void+primitive (pr))
    (symbol (x))
    (constant (c))
    (datum (d)))
  (Expr (e body)
    pr
    x
    c
    (quote d)
    (if e0 e1 e2)
    (or e* ...)
    (and e* ...)
    (not e)
    (begin e* ... e)
    (lambda (x* ...) body* ... body)
    (let ([x* e*] ...) body* ... body)
    (letrec ([x* e*] ...) body* ... body)
    (set! x e)
    (e e* ...)))
\endschemedisplay
}

\section{Differencing languages}

The extension form can also be derived between any two languages by using the
\scheme{diff-languages} form.
For instance, we can get the differences between the \scheme{Lsrc} and
\scheme{L1} language (giving us back the extension) with:

{\small
\schemedisplay
(diff-languages Lsrc L1)
\endschemedisplay
}

\noindent
which returns:

{\small
\schemedisplay
(define-language L1
  (extends Lsrc)
  (entry Expr)
  (terminals
    (- (primitive (pr)))
    (+ (void+primitive (pr))))
  (Expr (e body)
    (- (if e0 e1))))
\endschemedisplay
}

\section{Viewing the expansion of passes and transformers}

The \scheme{define-pass} form autogenerates both transformers and clauses
within transformers.
In simple passes, these are generally straightforward to reason about, but in
more complex passes, particularly those that make use of different arguments
for different transformers or include extra return values, it can become more
difficult to determine what code will be generated.
In particular, the experience of developing a full commercial compiler has
shown that the \scheme{define-pass} form can autogenerate transformers that
shadow those defined by the compiler writer.
To help the compiler writer determine what code is being generated,
there is a variation of the \scheme{define-pass} form, called
\scheme{echo-define-pass}, that will echo the expansion of \scheme{define-pass}.

For instance, we can echo the \scheme{remove-one-armed-if} pass to get the
following:

{\small
\schemedisplay
(echo-define-pass remove-one-armed-if : Lsrc (e) -> L1 ()
  (Expr : Expr (e) -> Expr ()
    [(if ,[e0] ,[e1]) `(if ,e0 ,e1 (void))]))

;=>

pass remove-one-armed-if expanded into:
(define remove-one-armed-if
  (lambda (e)
    (define who 'remove-one-armed-if)
    (define-nanopass-record)
    (define Expr
      (lambda (e)
        (let ([g221.159 e])
          (let-syntax ([quasiquote '#<procedure tmp>]
                       [in-context '#<procedure>])
            (begin
              (let ([rhs.160 (lambda (e0 e1) `(if ,e0 ,e1 (void)))])
                (cond
                  [(primitive? g221.159) g221.159]
                  [(symbol? g221.159) g221.159]
                  [(constant? g221.159) g221.159]
                  [else
                   (let ([tag (nanopass-record-tag g221.159)])
                     (cond
                       [(eqv? tag 4)
                        (let* ([g222.161 (Lsrc:if:Expr.387-e0 g221.159)]
                               [g223.162 (Lsrc:if:Expr.387-e1 g221.159)])
                          (let-values ([(e0) (Expr g222.161)]
                                       [(e1) (Expr g223.162)])
                            (rhs.160 e0 e1)))]
                       [(eqv? tag 2)
                        (make-L1:quote:Expr.400
                          'remove-one-armed-if
                          (Lsrc:quote:Expr.386-d g221.159)
                          "d")]
                       [(eqv? tag 6)
                        (make-L1:if:Expr.401 'remove-one-armed-if
                          (Expr (Lsrc:if:Expr.388-e0 g221.159))
                          (Expr (Lsrc:if:Expr.388-e1 g221.159))
                          (Expr (Lsrc:if:Expr.388-e2 g221.159)) "e0" "e1"
                          "e2")]
                       [(eqv? tag 8)
                        (make-L1:or:Expr.402
                          'remove-one-armed-if
                          (map (lambda (m) (Expr m))
                               (Lsrc:or:Expr.389-e* g221.159))
                          "e*")]
                       [(eqv? tag 10)
                        (make-L1:and:Expr.403
                          'remove-one-armed-if
                          (map (lambda (m) (Expr m))
                               (Lsrc:and:Expr.390-e* g221.159))
                          "e*")]
                       [(eqv? tag 12)
                        (make-L1:not:Expr.404
                          'remove-one-armed-if
                          (Expr (Lsrc:not:Expr.391-e g221.159))
                          "e")]
                       [(eqv? tag 14)
                        (make-L1:begin:Expr.405 'remove-one-armed-if
                          (map (lambda (m) (Expr m))
                               (Lsrc:begin:Expr.392-e* g221.159))
                          (Expr (Lsrc:begin:Expr.392-e g221.159)) "e*"
                          "e")]
                       [(eqv? tag 16)
                        (make-L1:lambda:Expr.406 'remove-one-armed-if
                          (Lsrc:lambda:Expr.393-x* g221.159)
                          (map (lambda (m) (Expr m))
                               (Lsrc:lambda:Expr.393-body* g221.159))
                          (Expr (Lsrc:lambda:Expr.393-body g221.159)) "x*"
                          "body*" "body")]
                       [(eqv? tag 18)
                        (make-L1:let:Expr.407 'remove-one-armed-if
                          (Lsrc:let:Expr.394-x* g221.159)
                          (map (lambda (m) (Expr m))
                               (Lsrc:let:Expr.394-e* g221.159))
                          (map (lambda (m) (Expr m))
                               (Lsrc:let:Expr.394-body* g221.159))
                          (Expr (Lsrc:let:Expr.394-body g221.159)) "x*"
                          "e*" "body*" "body")]
                       [(eqv? tag 20)
                        (make-L1:letrec:Expr.408 'remove-one-armed-if
                          (Lsrc:letrec:Expr.395-x* g221.159)
                          (map (lambda (m) (Expr m))
                               (Lsrc:letrec:Expr.395-e* g221.159))
                          (map (lambda (m) (Expr m))
                               (Lsrc:letrec:Expr.395-body* g221.159))
                          (Expr (Lsrc:letrec:Expr.395-body g221.159)) "x*"
                          "e*" "body*" "body")]
                       [(eqv? tag 22)
                        (make-L1:set!:Expr.409 'remove-one-armed-if
                          (Lsrc:set!:Expr.396-x g221.159)
                          (Expr (Lsrc:set!:Expr.396-e g221.159)) "x" "e")]
                       [(eqv? tag 24)
                        (make-L1:e:Expr.410 'remove-one-armed-if
                          (Expr (Lsrc:e:Expr.397-e g221.159))
                          (map (lambda (m) (Expr m))
                               (Lsrc:e:Expr.397-e* g221.159))
                          "e" "e*")]
                       [else
                        (error 'remove-one-armed-if
                          "unexpected Expr"
                          g221.159)]))])))))))
    (let ([x (Expr e)])
      (unless ((lambda (x)
                 (or (L1:Expr.399? x)
                     (constant? x)
                     (symbol? x)
                     (void+primitive? x)))
                x)
        (error 'remove-one-armed-if
          (format "expected ~s but got ~s" 'Expr x)))
      x)))
\endschemedisplay
}

\noindent
This exposes the code generated by \scheme{define-pass} but does not expand
the language form construction templates. 
The autogenerated clauses, such as the one that handles \scheme{set!}, have a form like the following: 

{\small
\schemedisplay
[(eqv? tag 7)
 (make-L1:set!:Expr.18
   (Lsrc:set!:Expr.8-x g0.14)
   (Expr (Lsrc:set!:Expr.8-e g0.14)))]
\endschemedisplay
}

\noindent
Here, the tag of the record is checked and a new output-language record constructed,
after recurring to the \scheme{Expr} transformer on the \scheme{e} field.

The body code also changes slightly, so that the output of the pass can be
checked to make sure that it is a valid \scheme{L1} \scheme{Expr}.

In addition to echoing the output of the entire pass, it is also possible to
echo just the expansion of a single transformer by prefixing the transformer
with the \scheme{echo} keyword.

{\small
\schemedisplay
(define-pass remove-one-armed-if : Lsrc (e) -> L1 ()
  (echo Expr : Expr (e) -> Expr ()
    [(if ,[e0] ,[e1]) `(if ,e0 ,e1 (void))]))

;=>

Expr in pass remove-one-armed-if expanded into:
(define Expr
  (lambda (e)
    (let ([g442.303 e])
      (let-syntax ([quasiquote '#<procedure tmp>]
                   [in-context '#<procedure>])
        (begin
          (let ([rhs.304 (lambda (e0 e1) `(if ,e0 ,e1 (void)))])
            (cond
              [(primitive? g442.303) g442.303]
              [(symbol? g442.303) g442.303]
              [(constant? g442.303) g442.303]
              [else
               (let ([tag (nanopass-record-tag g442.303)])
                 (cond
                   [(eqv? tag 4)
                    (let* ([g443.305 (Lsrc:if:Expr.770-e0 g442.303)]
                           [g444.306 (Lsrc:if:Expr.770-e1 g442.303)])
                      (let-values ([(e0) (Expr g443.305)]
                                   [(e1) (Expr g444.306)])
                        (rhs.304 e0 e1)))]
                   [(eqv? tag 2)
                    (make-L1:quote:Expr.783
                      'remove-one-armed-if
                      (Lsrc:quote:Expr.769-d g442.303)
                      "d")]
                   [(eqv? tag 6)
                    (make-L1:if:Expr.784 'remove-one-armed-if
                      (Expr (Lsrc:if:Expr.771-e0 g442.303))
                      (Expr (Lsrc:if:Expr.771-e1 g442.303))
                      (Expr (Lsrc:if:Expr.771-e2 g442.303)) "e0" "e1"
                      "e2")]
                   [(eqv? tag 8)
                    (make-L1:or:Expr.785
                      'remove-one-armed-if
                      (map (lambda (m) (Expr m))
                           (Lsrc:or:Expr.772-e* g442.303))
                      "e*")]
                   [(eqv? tag 10)
                    (make-L1:and:Expr.786
                      'remove-one-armed-if
                      (map (lambda (m) (Expr m))
                           (Lsrc:and:Expr.773-e* g442.303))
                      "e*")]
                   [(eqv? tag 12)
                    (make-L1:not:Expr.787
                      'remove-one-armed-if
                      (Expr (Lsrc:not:Expr.774-e g442.303))
                      "e")]
                   [(eqv? tag 14)
                    (make-L1:begin:Expr.788 'remove-one-armed-if
                      (map (lambda (m) (Expr m))
                           (Lsrc:begin:Expr.775-e* g442.303))
                      (Expr (Lsrc:begin:Expr.775-e g442.303)) "e*" "e")]
                   [(eqv? tag 16)
                    (make-L1:lambda:Expr.789 'remove-one-armed-if
                      (Lsrc:lambda:Expr.776-x* g442.303)
                      (map (lambda (m) (Expr m))
                           (Lsrc:lambda:Expr.776-body* g442.303))
                      (Expr (Lsrc:lambda:Expr.776-body g442.303)) "x*"
                      "body*" "body")]
                   [(eqv? tag 18)
                    (make-L1:let:Expr.790 'remove-one-armed-if (Lsrc:let:Expr.777-x* g442.303)
                      (map (lambda (m) (Expr m))
                           (Lsrc:let:Expr.777-e* g442.303))
                      (map (lambda (m) (Expr m))
                           (Lsrc:let:Expr.777-body* g442.303))
                      (Expr (Lsrc:let:Expr.777-body g442.303)) "x*" "e*"
                      "body*" "body")]
                   [(eqv? tag 20)
                    (make-L1:letrec:Expr.791 'remove-one-armed-if
                      (Lsrc:letrec:Expr.778-x* g442.303)
                      (map (lambda (m) (Expr m))
                           (Lsrc:letrec:Expr.778-e* g442.303))
                      (map (lambda (m) (Expr m))
                           (Lsrc:letrec:Expr.778-body* g442.303))
                      (Expr (Lsrc:letrec:Expr.778-body g442.303)) "x*" "e*"
                      "body*" "body")]
                   [(eqv? tag 22)
                    (make-L1:set!:Expr.792 'remove-one-armed-if (Lsrc:set!:Expr.779-x g442.303)
                      (Expr (Lsrc:set!:Expr.779-e g442.303)) "x" "e")]
                   [(eqv? tag 24)
                    (make-L1:e:Expr.793 'remove-one-armed-if
                      (Expr (Lsrc:e:Expr.780-e g442.303))
                      (map (lambda (m) (Expr m))
                           (Lsrc:e:Expr.780-e* g442.303))
                      "e" "e*")]
                   [else
                    (error 'remove-one-armed-if
                      "unexpected Expr"
                      g442.303)]))])))))))
\endschemedisplay
}

\section{Tracing passes and transformers}

Echoing the code generated by \scheme{define-pass} can help compiler writers
to understand what is happening at expansion time, but it does not help in determining
what is happening at run time.
To facilitate this type of debugging, passes and transformers can be
traced at run time.
The tracing system, similar to Chez Scheme's \scheme{trace-define-syntax},
unparses the input-language term and output-language term of the pass using the language unparsers to
provide the S-expression representation of the language term that is being transformed.

The \scheme{trace-define-pass} form works just like the \scheme{define-pass}
form but adds tracing for the input-language term and output-language term of the pass.
For instance, if we want to trace the processing of the input:

{\small
\schemedisplay
(let ([x 10])
  (if (= (* (/ x 2) 2) x) (set! x (/ x 2)))
  (* x 3))
\endschemedisplay
}

\noindent
the pass can be defined as a tracing pass, as follows:

{\small
\schemedisplay
(trace-define-pass remove-one-armed-if : Lsrc (e) -> L1 ()
  (Expr : Expr (e) -> Expr ()
    [(if ,[e0] ,[e1]) `(if ,e0 ,e1 (void))]))
\endschemedisplay
}

\noindent
Running the class compiler with \scheme{remove-one-armed-if} traced produces the following:

{\small
\schemedisplay
> (my-tiny-compile 
    '(let ([x 10])
       (if (= (* (/ x 2) 2) x) (set! x (/ x 2)))
       (* x 3)))
|(remove-one-armed-if
   (let ([x.12 10])
     (if (= (* (/ x.12 2) 2) x.12) (set! x.12 (/ x.12 2)))
     (* x.12 3)))
|(let ([x.12 10])
   (if (= (* (/ x.12 2) 2) x.12) (set! x.12 (/ x.12 2)) (void))
   (* x.12 3))
15
\endschemedisplay
}

\noindent
The tracer prints the \emph{pretty} (i.e., S-expression) form of the language,
rather than the record representation, to allow the compiler writer to read the
terms more easily.
This does not trace the internal transformations that happen within the
transformers of the pass.
Transformers can be traced by adding the \scheme{trace} keyword in front of the
transformer definition.
We can run the same test with a \scheme{remove-one-armed-if} that traces the
\scheme{Expr} transformer, as follows:

{\small
\schemedisplay
> (my-tiny-compile 
      '(let ([x 10])
         (if (= (* (/ x 2) 2) x) (set! x (/ x 2)))
         (* x 3)))
|(Expr
   (let ([x.0 10]) (if (= (* (/ x.0 2) 2) x.0) (set! x.0 (/ x.0 2))) (* x.0 3)))
| (Expr (* x.0 3))
| |(Expr x.0)
| |x.0
| |(Expr 3)
| |3
| |(Expr *)
| |*
| (* x.0 3)
| (Expr (if (= (* (/ x.0 2) 2) x.0) (set! x.0 (/ x.0 2))))
| |(Expr (= (* (/ x.0 2) 2) x.0))
| | (Expr (* (/ x.0 2) 2))
| | |(Expr (/ x.0 2))
| | | (Expr x.0)
| | | x.0
| | | (Expr 2)
| | | 2
| | | (Expr /)
| | | /
| | |(/ x.0 2)
| | |(Expr 2)
| | |2
| | |(Expr *)
| | |*
| | (* (/ x.0 2) 2)
| | (Expr x.0)
| | x.0
| | (Expr =)
| | =
| |(= (* (/ x.0 2) 2) x.0)
| |(Expr (set! x.0 (/ x.0 2)))
| | (Expr (/ x.0 2))
| | |(Expr x.0)
| | |x.0
| | |(Expr 2)
| | |2
| | |(Expr /)
| | |/
| | (/ x.0 2)
| |(set! x.0 (/ x.0 2))
| (if (= (* (/ x.0 2) 2) x.0) (set! x.0 (/ x.0 2)) (void))
| (Expr 10)
| 10
|(let ([x.0 10])
   (if (= (* (/ x.0 2) 2) x.0) (set! x.0 (/ x.0 2)) (void))
   (* x.0 3))
15
\endschemedisplay
}

\noindent
Here, too, the traced forms are the pretty representation and not
the record representation.

\bibliographystyle{abbrv}
\bibliography{user-guide}

\end{document}