File: user_manual.html

package info (click to toggle)
gridtools 2.3.9-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 29,480 kB
  • sloc: cpp: 228,792; python: 17,561; javascript: 9,164; ansic: 4,101; sh: 850; makefile: 231; f90: 201
file content (2070 lines) | stat: -rw-r--r-- 311,781 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>User Manual &mdash; GridTools 2.3.0 documentation</title>
      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
      <link rel="stylesheet" href="../_static/css/cscs.css" type="text/css" />
  <!--[if lt IE 9]>
    <script src="../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
        <script src="../_static/jquery.js"></script>
        <script src="../_static/underscore.js"></script>
        <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
        <script src="../_static/doctools.js"></script>
        <script async="async" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
    <script src="../_static/js/theme.js"></script>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="Glossary" href="../glossary/glossary.html" />
    <link rel="prev" title="Getting Started" href="../getting_started/getting_started.html" /> 
</head>

<body class="wy-body-for-nav"> 
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
            <a href="../index.html" class="icon icon-home"> GridTools
            <img src="../_static/logo.svg" class="logo" alt="Logo"/>
          </a>
              <div class="version">
                2.3
              </div>
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../introduction/introduction.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started.html">Getting Started</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">User Manual</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#storage-library">Storage Library</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#data-store">Data Store</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#data-store-synopsis">Data Store Synopsis</a></li>
<li class="toctree-l4"><a class="reference internal" href="#data-view-synopsis">Data View Synopsis</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#builder-api">Builder API</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#builder-synopsis">Builder Synopsis</a></li>
<li class="toctree-l4"><a class="reference internal" href="#constrains-on-builder-setters">Constrains on Builder Setters</a></li>
<li class="toctree-l4"><a class="reference internal" href="#notes-on-builder-setters-semantics">Notes on Builder Setters Semantics</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#traits">Traits</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#defining-custom-traits">Defining Custom Traits</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#sid-concept-adaptation">SID Concept Adaptation</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#stencil-operators">Stencil Operators</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#accessor-type">Accessor Type</a></li>
<li class="toctree-l3"><a class="reference internal" href="#parameter-list">Parameter list</a></li>
<li class="toctree-l3"><a class="reference internal" href="#apply-method"><cite>Apply</cite>-Method</a></li>
<li class="toctree-l3"><a class="reference internal" href="#example">Example</a></li>
<li class="toctree-l3"><a class="reference internal" href="#expressions">Expressions</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#execution-model">Execution Model</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#extended-compute-domain-of-a-stage">Extended Compute Domain of a Stage</a></li>
<li class="toctree-l3"><a class="reference internal" href="#access-restrictions">Access restrictions</a></li>
<li class="toctree-l3"><a class="reference internal" href="#examples-for-access-restrictions">Examples for Access Restrictions</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#stencil-composition">Stencil Composition</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#defining-the-iteration-space-the-grid">Defining the Iteration Space: the Grid</a></li>
<li class="toctree-l3"><a class="reference internal" href="#vertical-regions-and-vertical-boundary-conditions">Vertical Regions and Vertical Boundary Conditions</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#default-interval">Default Interval</a></li>
<li class="toctree-l4"><a class="reference internal" href="#defining-vertical-intervals">Defining Vertical Intervals</a></li>
<li class="toctree-l4"><a class="reference internal" href="#advanced-functionality-for-vertical-intervals">Advanced Functionality for Vertical Intervals</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#run-computation-driver"><strong>run</strong>: Computation Driver</a></li>
<li class="toctree-l3"><a class="reference internal" href="#stencil-composition-specification">Stencil Composition Specification</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#the-notion-of-a-stage">The Notion of a Stage</a></li>
<li class="toctree-l4"><a class="reference internal" href="#multi-pass-computations">Multi-Pass Computations</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#selecting-the-backend">Selecting the Backend</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#advanced-functionality">Advanced Functionality</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#stencil-functions">Stencil Functions</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#function-calls-call">Function Calls: <cite>call&lt;&gt;</cite></a></li>
<li class="toctree-l4"><a class="reference internal" href="#procedure-calls-call-proc">Procedure Calls: <cite>call_proc&lt;&gt;</cite></a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#software-managed-caches">Software-Managed Caches</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#cache-type">Cache Type</a></li>
<li class="toctree-l4"><a class="reference internal" href="#cache-policy">Cache Policy</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#expandable-parameters">Expandable Parameters</a></li>
<li class="toctree-l3"><a class="reference internal" href="#global-parameters">Global Parameters</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#boundary-conditions">Boundary Conditions</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#introduction">Introduction</a></li>
<li class="toctree-l3"><a class="reference internal" href="#preliminaries">Preliminaries</a></li>
<li class="toctree-l3"><a class="reference internal" href="#boundary-condition-class">Boundary Condition Class</a></li>
<li class="toctree-l3"><a class="reference internal" href="#boundary-condition-application">Boundary Condition Application</a></li>
<li class="toctree-l3"><a class="reference internal" href="#boundary-predication">Boundary Predication</a></li>
<li class="toctree-l3"><a class="reference internal" href="#provided-boundary-conditions">Provided Boundary Conditions</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#halo-exchanges">Halo Exchanges</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#id14">Introduction</a></li>
<li class="toctree-l3"><a class="reference internal" href="#id15">Preliminaries</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#processor-grid">Processor Grid</a></li>
<li class="toctree-l4"><a class="reference internal" href="#layout-map">Layout Map</a></li>
<li class="toctree-l4"><a class="reference internal" href="#halo-descriptor">Halo Descriptor</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#gcl-communication-module">GCL Communication Module</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#distributed-boundary-conditions">Distributed Boundary Conditions</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#design-principles">Design Principles:</a></li>
<li class="toctree-l3"><a class="reference internal" href="#communication-traits">Communication Traits</a></li>
<li class="toctree-l3"><a class="reference internal" href="#binding-boundaries-and-communication">Binding Boundaries and Communication</a></li>
<li class="toctree-l3"><a class="reference internal" href="#distributed-boundaries">Distributed Boundaries</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#interfacing-to-other-programming-languages">Interfacing to other programming languages</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#exporting-functions-with-no-array-type-arguments">Exporting functions with no array-type arguments</a></li>
<li class="toctree-l3"><a class="reference internal" href="#complex-types">Complex types</a></li>
<li class="toctree-l3"><a class="reference internal" href="#exporting-functions-with-array-type-arguments-to-fortran">Exporting functions with array-type arguments to Fortran</a></li>
<li class="toctree-l3"><a class="reference internal" href="#cmake-usage">CMake usage</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../glossary/glossary.html">Glossary</a></li>
<li class="toctree-l1"><a class="reference internal" href="../internal/internal.html">Internal Documentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../faq/faq.html">Frequently Asked Questions</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">GridTools</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">
      <li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
      <li>User Manual</li>
      <li class="wy-breadcrumbs-aside">
            <a href="../_sources/user_manual/user_manual.rst.txt" rel="nofollow"> View page source</a>
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
             
  <section id="user-manual">
<span id="id1"></span><h1>User Manual<a class="headerlink" href="#user-manual" title="Permalink to this heading"></a></h1>
<section id="storage-library">
<span id="storage-module"></span><h2>Storage Library<a class="headerlink" href="#storage-library" title="Permalink to this heading"></a></h2>
<p>The Storage Library provides the way to represent multidimensional typed contiguous memory allocations with arbitrary
layout and alignment. All entities are defined in the <code class="docutils literal notranslate"><span class="pre">gridtools::storage</span></code> namespace.</p>
<section id="data-store">
<h3>Data Store<a class="headerlink" href="#data-store" title="Permalink to this heading"></a></h3>
<p>The key entity of the library, representing a multidimensional array.</p>
<blockquote>
<div><ul class="simple">
<li><p>Data store has no user facing constructors. To create it one should use the <a class="reference internal" href="#builder-api"><span class="std std-ref">Builder API</span></a>.</p></li>
<li><p>The access to the actual data is indirect. Data store has methods to request a view. The view provides data access
via overloaded call operator.</p></li>
<li><p>Data store is aware of memory spaces. It distinguish between <code class="docutils literal notranslate"><span class="pre">target</span></code> and <code class="docutils literal notranslate"><span class="pre">host</span></code> data access. Views are
requested with <code class="docutils literal notranslate"><span class="pre">target_view()</span></code>/<code class="docutils literal notranslate"><span class="pre">target_const_view()</span></code>/<code class="docutils literal notranslate"><span class="pre">host_view()</span></code>/<code class="docutils literal notranslate"><span class="pre">host_const_view()</span></code> methods. If <code class="docutils literal notranslate"><span class="pre">target</span></code> and
<code class="docutils literal notranslate"><span class="pre">host</span></code> spaces are different and the data store holds non constant data, data store performs automatic memory
synchronization if needed. It is assumed that the target memory space access is used for doing computations
and host access is used for filling, dumping and verifying the data.</p></li>
</ul>
</div></blockquote>
<section id="data-store-synopsis">
<span id="id2"></span><h4>Data Store Synopsis<a class="headerlink" href="#data-store-synopsis" title="Permalink to this heading"></a></h4>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="o">&lt;</span><span class="cm">/* Implementation defined parameters */</span><span class="o">&gt;</span><span class="w"></span>
<span class="k">class</span><span class="w"> </span><span class="nc">data_store</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">  </span><span class="k">public</span><span class="o">:</span><span class="w"></span>
<span class="w">    </span><span class="k">static</span><span class="w"> </span><span class="k">constexpr</span><span class="w"> </span><span class="kt">size_t</span><span class="w"> </span><span class="n">ndims</span><span class="p">;</span><span class="w"> </span><span class="cm">/* Dimensionality */</span><span class="w"></span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">layout_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="cm">/* Instantiation of gridtools::layout_map. */</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">data_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="cm">/* Type of the element. */</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="c1">// The following invariant is held: for any data_store instancies that have</span>
<span class="w">    </span><span class="c1">// the same kind_t the strides are also the same.</span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">kind_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="cm">/* A type that identifies the strides set. */</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="c1">// Data store arbitrary label. Mainly for debugging.</span>
<span class="w">    </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="n">name</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="c1">// The sizes of the data store in each dimension.</span>
<span class="w">    </span><span class="n">array</span><span class="o">&lt;</span><span class="kt">unsigned</span><span class="p">,</span><span class="w"> </span><span class="n">ndims</span><span class="o">&gt;</span><span class="w"> </span><span class="n">lengths</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="c1">// The strides of the data store in each dimension.</span>
<span class="w">    </span><span class="n">array</span><span class="o">&lt;</span><span class="kt">unsigned</span><span class="p">,</span><span class="w"> </span><span class="n">ndims</span><span class="o">&gt;</span><span class="w"> </span><span class="n">strides</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="c1">// lengths and strides in the form of tuples.</span>
<span class="w">    </span><span class="c1">// If the length along some dimension is known in compile time (N),</span>
<span class="w">    </span><span class="c1">// it is represented as an intergral_constant&lt;int, N&gt;,</span>
<span class="w">    </span><span class="c1">// otherwise as int.</span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="o">&amp;</span><span class="w"> </span><span class="n">native_lengths</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="o">&amp;</span><span class="w"> </span><span class="n">native_strides</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="c1">// 1D length of the data store expressed in number of elements.</span>
<span class="w">    </span><span class="c1">// Namely it is a pointer difference between the last and the first element minus one.</span>
<span class="w">    </span><span class="kt">unsigned</span><span class="w"> </span><span class="nf">length</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="c1">// Supplementary object that holds lengths and strides.</span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="n">info</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="c1">// Request the target view.</span>
<span class="w">    </span><span class="c1">// If the target and host spaces are different necessary synchronization is performed</span>
<span class="w">    </span><span class="c1">// and the host counterpart is marked as dirty.</span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">target_view</span><span class="p">();</span><span class="w"></span>
<span class="w">    </span><span class="c1">// Const version doesn&#39;t mark host counterpart as dirty. Synchronization takes place.</span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">const_target_view</span><span class="p">();</span><span class="w"></span>

<span class="w">    </span><span class="c1">// Raw ptr alternatives for target_view/const_target_view.</span>
<span class="w">    </span><span class="c1">// Synchronization behaviour is the same.</span>
<span class="w">    </span><span class="n">data_t</span><span class="w"> </span><span class="o">*</span><span class="nf">get_target_ptr</span><span class="p">();</span><span class="w"></span>
<span class="w">    </span><span class="n">data_t</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">*</span><span class="n">get_const_target_ptr</span><span class="p">();</span><span class="w"></span>

<span class="w">    </span><span class="c1">// Host access methods variations. They only exist if !std::is_const_v&lt;data_t&gt;.</span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">host_view</span><span class="p">();</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">host_const_view</span><span class="p">();</span><span class="w"></span>
<span class="w">    </span><span class="n">data_t</span><span class="w"> </span><span class="o">*</span><span class="nf">get_host_ptr</span><span class="p">();</span><span class="w"></span>
<span class="w">    </span><span class="n">data_t</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">*</span><span class="n">get_const_host_ptr</span><span class="p">();</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="data-view-synopsis">
<span id="data-view"></span><h4>Data View Synopsis<a class="headerlink" href="#data-view-synopsis" title="Permalink to this heading"></a></h4>
<p>Data view is a supplemental struct that is returned form data store access methods. The distinctive property:
data view is a POD. Hence it can be passed to the target device by copying the memory. For the gpu data stores
all data view methods are declared as device only.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">class</span><span class="w"> </span><span class="nc">T</span><span class="p">,</span><span class="w"> </span><span class="kt">size_t</span><span class="w"> </span><span class="n">N</span><span class="o">&gt;</span><span class="w"></span>
<span class="k">struct</span><span class="w"> </span><span class="nc">some_view</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="c1">// POD members here</span>

<span class="w">    </span><span class="c1">// The meta info methods are the same as for data_store.</span>
<span class="w">    </span><span class="n">array</span><span class="o">&lt;</span><span class="kt">unsigned</span><span class="p">,</span><span class="w"> </span><span class="n">N</span><span class="o">&gt;</span><span class="w"> </span><span class="n">lengths</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="n">array</span><span class="o">&lt;</span><span class="kt">unsigned</span><span class="p">,</span><span class="w"> </span><span class="n">N</span><span class="o">&gt;</span><span class="w"> </span><span class="n">strides</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="o">&amp;</span><span class="w"> </span><span class="n">native_lengths</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="o">&amp;</span><span class="w"> </span><span class="n">native_strides</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="o">&amp;</span><span class="w"></span>
<span class="w">    </span><span class="kt">unsigned</span><span class="w"> </span><span class="n">length</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="n">info</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="c1">// raw access</span>
<span class="w">    </span><span class="n">T</span><span class="w"> </span><span class="o">*</span><span class="nf">data</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="c1">// multi dimensional indexed access</span>
<span class="w">    </span><span class="n">T</span><span class="w"> </span><span class="o">&amp;</span><span class="nf">operator</span><span class="p">()(</span><span class="kt">int</span><span class="p">...</span><span class="w"> </span><span class="cm">/*number of arguments equals to N*/</span><span class="w"> </span><span class="p">)</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="c1">// variation with array as an argument</span>
<span class="w">    </span><span class="n">T</span><span class="w"> </span><span class="o">&amp;</span><span class="nf">operator</span><span class="p">()(</span><span class="n">array</span><span class="o">&lt;</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="n">N</span><span class="o">&gt;</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="p">)</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>On data store synchronization behaviour</p>
<p>If target and host spaces are different and the data is mutable, data store manages both target and host allocations.
Internally it keeps a flag that can be either <code class="docutils literal notranslate"><span class="pre">clean</span></code>, <code class="docutils literal notranslate"><span class="pre">dirty</span> <span class="pre">target</span></code> or <code class="docutils literal notranslate"><span class="pre">dirty</span> <span class="pre">host</span></code>.
When a view is requested and the correspondent allocation is marked as dirty, the data store performs a memory transfer
and the allocation is marked as clean. The counterpart allocation is marked as dirty if a non-constant view is requested.
Each new view request potentially invalidates the previously created views.
Therefore, is is best practice to limit the scope of view objects as much as possible to avoid stale
data views. Here are some illustrations:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">class</span><span class="w"> </span><span class="nc">View</span><span class="o">&gt;</span><span class="w"></span>
<span class="n">__global__</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">kernel</span><span class="p">(</span><span class="n">View</span><span class="w"> </span><span class="n">view</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">view</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">view</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">OxDEADBEEF</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"></span>

<span class="p">...</span><span class="w"></span>

<span class="c1">// host and target allocations are made here. The state is set to clean</span>
<span class="k">auto</span><span class="w"> </span><span class="n">ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">gpu</span><span class="o">&gt;</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="p">().</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">2</span><span class="p">)();</span><span class="w"></span>

<span class="c1">// no memory transfer because of the clean state</span>
<span class="c1">// the state becomes dirty_target</span>
<span class="k">auto</span><span class="w"> </span><span class="n">host_view</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">host_view</span><span class="p">();</span><span class="w"></span>
<span class="c1">// the use of the host view</span>
<span class="n">host_view</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">host_view</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">42</span><span class="p">;</span><span class="w"></span>

<span class="c1">// memory transfer to the target space because the state is dirty_target</span>
<span class="c1">// the state becomes dirty_host</span>
<span class="c1">// host_view becomes stale at this point</span>
<span class="k">auto</span><span class="w"> </span><span class="n">target_view</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">target_view</span><span class="p">();</span><span class="w"></span>

<span class="c1">// the use of the target view</span>
<span class="n">kernel</span><span class="o">&lt;&lt;&lt;</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="o">&gt;&gt;&gt;</span><span class="p">(</span><span class="n">target_view</span><span class="p">);</span><span class="w"></span>

<span class="c1">// memory transfer to the host space because the state is dirty_host</span>
<span class="c1">// the state becomes clean</span>
<span class="c1">// both host_view and target_view are stale at this point</span>
<span class="k">auto</span><span class="w"> </span><span class="n">host_view_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">const_host_view</span><span class="p">();</span><span class="w"></span>
<span class="c1">// the use of the second host view</span>
<span class="n">assert</span><span class="p">(</span><span class="n">host_view_2</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">OxDEADBEEF</span><span class="p">);</span><span class="w"></span>
<span class="n">assert</span><span class="p">(</span><span class="n">host_view_2</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">OxDEADBEEF</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>We can refactor this to exclude the possibility of using state data:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">v</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">host_view</span><span class="p">();</span><span class="w"></span>
<span class="w">    </span><span class="n">v</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">v</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">42</span><span class="p">;</span><span class="w"></span>
<span class="p">}</span><span class="w"></span>

<span class="n">kernel</span><span class="o">&lt;&lt;&lt;</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="o">&gt;&gt;&gt;</span><span class="p">(</span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">target_view</span><span class="p">());</span><span class="w"></span>

<span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">v</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">const_host_view</span><span class="p">();</span><span class="w"></span>
<span class="w">    </span><span class="n">assert</span><span class="p">(</span><span class="n">v</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">OxDEADBEEF</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="n">assert</span><span class="p">(</span><span class="n">v</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">OxDEADBEEF</span><span class="p">);</span><span class="w"></span>
<span class="p">}</span><span class="w"></span>
</pre></div>
</div>
</div>
</section>
</section>
<section id="builder-api">
<span id="id3"></span><h3>Builder API<a class="headerlink" href="#builder-api" title="Permalink to this heading"></a></h3>
<p>The builder design pattern is used for data store construction. The API is defined in <cite>gridtools/storage/builder.hpp</cite>.
Here a single user facing symbol is defined – <code class="docutils literal notranslate"><span class="pre">storage::builder</span></code>.
It is a value template parametrized by <code class="docutils literal notranslate"><span class="pre">Traits</span></code> (see below).
The idea is that the user takes a builder with the desired traits, customize it with requested properties and finally
calls the <code class="docutils literal notranslate"><span class="pre">build()</span></code> method (or alternatively the overloaded call operator) to produce a <code class="docutils literal notranslate"><span class="pre">std::shared_ptr</span></code> to a data store.
For example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="n">ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nn">storage</span><span class="o">::</span><span class="n">builder</span><span class="o">&lt;</span><span class="nn">storage</span><span class="o">::</span><span class="n">gpu</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">double</span><span class="o">&gt;</span><span class="p">()</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">name</span><span class="p">(</span><span class="s">&quot;my special data&quot;</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">132</span><span class="p">,</span><span class="w"> </span><span class="mi">132</span><span class="p">,</span><span class="w"> </span><span class="mi">80</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">halos</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">selector</span><span class="o">&lt;</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="o">&gt;</span><span class="p">()</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">value</span><span class="p">(</span><span class="mi">42</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">build</span><span class="p">();</span><span class="w"></span>

<span class="n">assert</span><span class="p">(</span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">const_host_view</span><span class="p">()(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="mi">42</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>One can also use partially specified builder to produce several data stores:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">my_builder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nn">storage</span><span class="o">::</span><span class="n">builder</span><span class="o">&lt;</span><span class="nn">storage</span><span class="o">::</span><span class="n">gpu</span><span class="o">&gt;</span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">foo</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_builder</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="p">().</span><span class="n">name</span><span class="p">(</span><span class="s">&quot;foo&quot;</span><span class="p">)();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">bar</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_builder</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="n">tuple</span><span class="o">&lt;</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="kt">double</span><span class="o">&gt;&gt;</span><span class="p">()();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">baz</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_builder</span><span class="w"></span>
<span class="w">         </span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">double</span><span class="w"> </span><span class="k">const</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">         </span><span class="p">.</span><span class="n">initialize</span><span class="p">([](</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">j</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">k</span><span class="p">){</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">k</span><span class="p">;</span><span class="w"> </span><span class="p">})</span><span class="w"></span>
<span class="w">         </span><span class="p">.</span><span class="n">build</span><span class="p">();</span><span class="w"></span>
</pre></div>
</div>
<p>This API implements an advanced variation of the builder design pattern. Unlike classic builder, the setters don’t
return a reference <cite>*this</cite>, but a new instance of a potentially different class. Because of that an improper
usage of builder is caught at compile time:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// compilation failure: dimensions should be set</span>
<span class="k">auto</span><span class="w"> </span><span class="n">bad0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">cpu_ifirst</span><span class="o">&gt;</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">double</span><span class="o">&gt;</span><span class="p">().</span><span class="n">build</span><span class="p">();</span><span class="w"></span>

<span class="c1">// compilation failure: value and initialize setters are mutually exclusive</span>
<span class="k">auto</span><span class="w"> </span><span class="n">bad1</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">cpu_ifirst</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">          </span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="p">()</span><span class="w"></span>
<span class="w">          </span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span><span class="w"></span>
<span class="w">          </span><span class="p">.</span><span class="n">value</span><span class="p">(</span><span class="mi">42</span><span class="p">)</span><span class="w"></span>
<span class="w">          </span><span class="p">.</span><span class="n">initialize</span><span class="p">([](</span><span class="kt">int</span><span class="w"> </span><span class="n">i</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">i</span><span class="p">;})</span><span class="w"></span>
<span class="w">          </span><span class="p">.</span><span class="n">build</span><span class="p">();</span><span class="w"></span>
</pre></div>
</div>
<section id="builder-synopsis">
<h4>Builder Synopsis<a class="headerlink" href="#builder-synopsis" title="Permalink to this heading"></a></h4>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="cm">/* Implementation defined parameters. */</span><span class="o">&gt;</span><span class="w"></span>
<span class="k">class</span><span class="w"> </span><span class="nc">buider_type</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">  </span><span class="k">public</span><span class="o">:</span><span class="w"></span>
<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">class</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">type</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">id</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">unknown_id</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="kt">int</span><span class="p">...</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">layout</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="kt">bool</span><span class="p">...</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">selector</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">name</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="p">)</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">dimensions</span><span class="p">(...)</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">halos</span><span class="p">(</span><span class="kt">unsigned</span><span class="p">...)</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">class</span><span class="w"> </span><span class="nc">Fun</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">initializer</span><span class="p">(</span><span class="n">Fun</span><span class="p">)</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">class</span><span class="w"> </span><span class="nc">T</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">value</span><span class="p">(</span><span class="n">T</span><span class="p">)</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="n">build</span><span class="p">()</span><span class="w"> </span><span class="k">const</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">auto</span><span class="w"> </span><span class="k">operator</span><span class="p">()()</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">build</span><span class="p">();</span><span class="w"> </span><span class="p">}</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
<span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">class</span><span class="w"> </span><span class="nc">Traits</span><span class="o">&gt;</span><span class="w"></span>
<span class="k">constexpr</span><span class="w"> </span><span class="n">builder_type</span><span class="o">&lt;</span><span class="cm">/* Implementation defined parameters. */</span><span class="o">&gt;</span><span class="w"> </span><span class="n">builder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{};</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="constrains-on-builder-setters">
<h4>Constrains on Builder Setters<a class="headerlink" href="#constrains-on-builder-setters" title="Permalink to this heading"></a></h4>
<blockquote>
<div><ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">type</span></code> and <code class="docutils literal notranslate"><span class="pre">dimensions</span></code> should be set before calling <code class="docutils literal notranslate"><span class="pre">build</span></code></p></li>
<li><p>any property can be set at most once</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">layout</span></code> and <code class="docutils literal notranslate"><span class="pre">selector</span></code> properties are mutually exclusive</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">value</span></code> and <code class="docutils literal notranslate"><span class="pre">initializer</span></code> properties are mutually exclusive</p></li>
<li><p>the template arity of <code class="docutils literal notranslate"><span class="pre">layout</span></code>/<code class="docutils literal notranslate"><span class="pre">selector</span></code> equals <code class="docutils literal notranslate"><span class="pre">dimension</span></code> arity</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">halos</span></code> arity equals <code class="docutils literal notranslate"><span class="pre">dimension</span></code> arity</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">initializer</span></code> argument is callable with <code class="docutils literal notranslate"><span class="pre">int</span></code> ‘s, has <code class="docutils literal notranslate"><span class="pre">dimension</span></code> arity
and its return type is convertible to <code class="docutils literal notranslate"><span class="pre">type</span></code> argument</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">value</span></code> argument type is convertible to <cite>type</cite> argument.</p></li>
<li><p>if <code class="docutils literal notranslate"><span class="pre">type</span></code> argument is <code class="docutils literal notranslate"><span class="pre">const</span></code>, <code class="docutils literal notranslate"><span class="pre">value</span></code> or <code class="docutils literal notranslate"><span class="pre">initializer</span></code> should be set</p></li>
</ul>
</div></blockquote>
</section>
<section id="notes-on-builder-setters-semantics">
<h4>Notes on Builder Setters Semantics<a class="headerlink" href="#notes-on-builder-setters-semantics" title="Permalink to this heading"></a></h4>
<blockquote>
<div><ul>
<li><p><strong>id:</strong> The use case of setting <code class="docutils literal notranslate"><span class="pre">id</span></code> is to ensure the invariant for <code class="docutils literal notranslate"><span class="pre">data_store::kind_t</span></code>. It should identify
the unique set of dimension sizes. Note the difference: <code class="docutils literal notranslate"><span class="pre">data_store::kind_t</span></code> represents the set of unique <code class="docutils literal notranslate"><span class="pre">strides</span></code>,
but <code class="docutils literal notranslate"><span class="pre">id</span></code> represents the set of unique sizes. Example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// We have two different sizes that we use in our computation.</span>
<span class="c1">// Hence we prepare two partially specified builders.</span>
<span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">builder_a</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">gpu</span><span class="o">&gt;</span><span class="p">.</span><span class="n">id</span><span class="o">&lt;</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="mi">5</span><span class="p">);</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">builder_b</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">gpu</span><span class="o">&gt;</span><span class="p">.</span><span class="n">id</span><span class="o">&lt;</span><span class="mi">1</span><span class="o">&gt;</span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">6</span><span class="p">,</span><span class="w"> </span><span class="mi">7</span><span class="p">);</span><span class="w"></span>

<span class="c1">// We use our builders to make some data_stores.</span>
<span class="k">auto</span><span class="w"> </span><span class="n">a_0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder_a</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">double</span><span class="o">&gt;</span><span class="p">().</span><span class="n">build</span><span class="p">();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">a_1</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder_a</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">double</span><span class="o">&gt;</span><span class="p">().</span><span class="n">build</span><span class="p">();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">a_2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder_a</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">float</span><span class="o">&gt;</span><span class="p">().</span><span class="n">halos</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">).</span><span class="n">build</span><span class="p">();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">b_0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder_a</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">double</span><span class="o">&gt;</span><span class="p">().</span><span class="n">build</span><span class="p">();</span><span class="w"></span>

<span class="c1">// kind_t aliases of a_0 and a_1 are the same.</span>
<span class="c1">// kind_t aliases of a_0 and b_0 are different,</span>
<span class="c1">//   because the id property is different.</span>
<span class="c1">// kind_t aliases of a_0 and a_2 are different,</span>
<span class="c1">//   even though id property is the same,</span>
<span class="c1">//   because types are different.</span>
</pre></div>
</div>
<p>At the moment <code class="docutils literal notranslate"><span class="pre">id</span></code>/<code class="docutils literal notranslate"><span class="pre">kind_t</span></code> matters if data stores are used in the context of <cite>GridTools</cite> stencil computation.
Otherwise there is no need to set <code class="docutils literal notranslate"><span class="pre">id</span></code>. Note also that setting <code class="docutils literal notranslate"><span class="pre">id</span></code> can be skipped if only one set
of dimension sizes is used even in <cite>GridTools</cite> stencil computation context.</p>
</li>
<li><p><strong>unknown_id:</strong> If <code class="docutils literal notranslate"><span class="pre">unknown_id</span></code> is set for the builder, the resulting <code class="docutils literal notranslate"><span class="pre">data_store::kind_t</span></code> will be equal to
<code class="docutils literal notranslate"><span class="pre">sid::unknown_kind</span></code>. This will opt out this data store from the optimizations that are used in the gridtools
stencil computation. it makes sense to set <code class="docutils literal notranslate"><span class="pre">unknown_id</span></code> if the same builder is used to create the data stores with
different dimension set and those fields are participating in the same stencil computation.</p></li>
<li><p><strong>dimensions:</strong> Allows to specify the dimensions of the array. Arguments are either
of integral type or derived from the <code class="docutils literal notranslate"><span class="pre">std::integral_constant</span></code> instantiation. Examples:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">gridtools</span><span class="o">::</span><span class="n">literals</span><span class="p">;</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">my_builder</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">cpu_ifirst</span><span class="o">&gt;</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="p">();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">dynamic_ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_builder</span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">)();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">static_ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_builder</span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">2</span><span class="n">_c</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="n">_c</span><span class="p">)();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">mixed_ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_builder</span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="n">_c</span><span class="p">)();</span><span class="w"></span>
</pre></div>
</div>
<p>In this example all data stores act almost the same way.
But <code class="docutils literal notranslate"><span class="pre">static_ds</span></code> (unlike <code class="docutils literal notranslate"><span class="pre">dynamic_ds</span></code>) does not hold its dimensions in runtime, it
is encoded in the type instead. I.e. meta information (<code class="docutils literal notranslate"><span class="pre">lengths</span></code>/<code class="docutils literal notranslate"><span class="pre">strides</span></code>) takes less space
and also indexing/iterating code can be more aggressively optimized by compiler.</p>
</li>
<li><p><strong>halos:</strong> The memory alignment is controlled by specifying <code class="docutils literal notranslate"><span class="pre">Traits</span></code> (the template parameter of the builder).
By default each first element of the innermost dimension is aligned. <code class="docutils literal notranslate"><span class="pre">halos</span></code> allows to explicitly specify
the index of element that should be aligned. Together with chosen element, all elements that share its
innermost index will be aligned as well.</p></li>
<li><p><strong>selector:</strong> allows to <a class="reference internal" href="../glossary/glossary.html#term-Masked-Dimension"><span class="xref std std-term">mask</span></a> out any dimension or several. Example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="n">ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">cpu_ifirst</span><span class="o">&gt;</span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="p">().</span><span class="n">selector</span><span class="o">&lt;</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">().</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">).</span><span class="n">value</span><span class="p">(</span><span class="mi">-1</span><span class="p">)();</span><span class="w"></span>
<span class="k">auto</span><span class="w"> </span><span class="n">view</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">host_view</span><span class="p">();</span><span class="w"></span>
<span class="c1">// even though the second dimension is masked out</span>
<span class="c1">// we can used indices in the defined range</span>
<span class="n">assert</span><span class="p">(</span><span class="n">ds</span><span class="o">-&gt;</span><span class="n">lengths</span><span class="p">()[</span><span class="mi">1</span><span class="p">],</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span><span class="w"></span>
<span class="n">assert</span><span class="p">(</span><span class="n">view</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="mi">-1</span><span class="p">);</span><span class="w"></span>
<span class="n">assert</span><span class="p">(</span><span class="n">view</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">9</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="mi">-1</span><span class="p">);</span><span class="w"></span>
<span class="c1">// but elements that differs only by the masked out index refer to the same data</span>
<span class="n">assert</span><span class="p">(</span><span class="o">&amp;</span><span class="n">view</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="o">&amp;</span><span class="n">view</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">9</span><span class="p">));</span><span class="w"></span>
</pre></div>
</div>
</li>
<li><p><strong>layout:</strong> By default the data layout is controlled by <code class="docutils literal notranslate"><span class="pre">Traits</span></code>. However it is overridable with
the <code class="docutils literal notranslate"><span class="pre">layout</span></code> setter. Example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="n">ds0</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">builder</span><span class="o">&lt;</span><span class="n">gpu</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="p">.</span><span class="n">type</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="p">()</span><span class="w"></span>
<span class="w">    </span><span class="p">.</span><span class="n">layout</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="o">&gt;</span><span class="p">()</span><span class="w"></span>
<span class="w">    </span><span class="p">.</span><span class="n">dimensions</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">)</span><span class="w"></span>
<span class="w">    </span><span class="p">.</span><span class="n">name</span><span class="p">(</span><span class="s">&quot;my tuned storage for the specific use case&quot;</span><span class="p">)</span><span class="w"></span>
<span class="w">    </span><span class="p">.</span><span class="n">build</span><span class="p">();</span><span class="w"></span>
</pre></div>
</div>
<p>The template parameters of <code class="docutils literal notranslate"><span class="pre">layout</span></code> are a permutation of the value from <code class="docutils literal notranslate"><span class="pre">0</span></code> to <code class="docutils literal notranslate"><span class="pre">N</span> <span class="pre">-</span> <span class="pre">1</span></code>, where <code class="docutils literal notranslate"><span class="pre">N</span></code>
is the number of dimensions of the storage. The values indicate the order of the dimensions by decreasing strides.
For instance a C array <code class="docutils literal notranslate"><span class="pre">X[i][j][k]</span></code> layout would be equivalent to <code class="docutils literal notranslate"><span class="pre">.layout&lt;0,1,2&gt;()</span></code>.
The dimension with stride 1 has the highest index, so in this case, the k-stride is a unit stride. A Fortran style
array <code class="docutils literal notranslate"><span class="pre">X[i][j][k]</span></code> layout would be equivalent to <code class="docutils literal notranslate"><span class="pre">.layout&lt;2,1,0&gt;()</span></code>, meaning that the i-stride is a unit-stride
(thus, the first index of the <code class="docutils literal notranslate"><span class="pre">layout</span></code> is 2).</p>
<p>There is also the possibility to mask dimensions. This means that the storage appears as n-dimensional but
the masked dimensions are ignored. For instance <code class="docutils literal notranslate"><span class="pre">.layout_map&lt;1,-1,0&gt;()</span></code> describes a 3-dimensional storage, where
the i-stride is a unit stride and the j dimension is masked. In this case the storage is allocated as a
two-dimensional array, but it behaves as a three-dimensional array. Accessing the array at <code class="docutils literal notranslate"><span class="pre">(i,</span> <span class="pre">j,</span> <span class="pre">k)</span></code> always
returns the element at <code class="docutils literal notranslate"><span class="pre">(i,</span> <span class="pre">0,</span> <span class="pre">k)</span></code>. This kind of storage can be used two implement oriented planes in stencils.</p>
</li>
</ul>
</div></blockquote>
</section>
</section>
<section id="traits">
<h3>Traits<a class="headerlink" href="#traits" title="Permalink to this heading"></a></h3>
<p>Builder API needs a traits type to instantiate the <code class="docutils literal notranslate"><span class="pre">builder</span></code> object. In order to be used in this context
this type should model the <code class="docutils literal notranslate"><span class="pre">Storage</span> <span class="pre">Traits</span> <span class="pre">Concept</span></code>. The library comes with three predefined traits:</p>
<blockquote>
<div><ul class="simple">
<li><p><strong>cpu_kfirst:</strong> Layout is chosen to benefit from data locality while doing 3D loop.
<code class="docutils literal notranslate"><span class="pre">malloc</span></code> allocation. No alignment. <code class="docutils literal notranslate"><span class="pre">target</span></code> and <code class="docutils literal notranslate"><span class="pre">host</span></code> spaces are the same.</p></li>
<li><p><strong>cpu_ifirst:</strong> Huge page allocation. 8 bytes alignment. Layout is tailored to utilize vectorization while
3D looping. <code class="docutils literal notranslate"><span class="pre">target</span></code> and <code class="docutils literal notranslate"><span class="pre">host</span></code> spaces are the same.</p></li>
<li><p><strong>gpu:</strong> Tailored for GPU. <code class="docutils literal notranslate"><span class="pre">target</span></code> and <code class="docutils literal notranslate"><span class="pre">host</span></code> spaces are different.</p></li>
</ul>
</div></blockquote>
<p>Each traits resides in its own header. Note that <code class="docutils literal notranslate"><span class="pre">builder.hpp</span></code> doesn’t include any specific traits headers.
To use a particular trait the user should include the correspondent header.</p>
<section id="defining-custom-traits">
<h4>Defining Custom Traits<a class="headerlink" href="#defining-custom-traits" title="Permalink to this heading"></a></h4>
<p>To use their own traits, users should provide a type that models the <code class="docutils literal notranslate"><span class="pre">Storage</span> <span class="pre">Traits</span> <span class="pre">Concept</span></code>. There is no need
to place a custom traits within <cite>GridTools</cite> source tree. The concept is ADL-based. The easiest way to go
is to copy any of predefined traits and modify it. Skipping some details the concept is defined as follows:</p>
<blockquote>
<div><ul>
<li><p>traits must specify if the <code class="docutils literal notranslate"><span class="pre">target</span></code> and <code class="docutils literal notranslate"><span class="pre">host</span></code> memory spaces are the same by providing
a <code class="docutils literal notranslate"><span class="pre">storage_is_host_referenceable</span></code> ADL-based overload function.</p></li>
<li><p>traits must specify alignment in bytes by defining a <code class="docutils literal notranslate"><span class="pre">storage_alignment</span></code> function.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">storage_allocate</span></code> function must be defined to say the library how to target memory is allocated.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">storage_layout</span></code> function is needed to define the layout_map for a given number of dimensions.</p></li>
<li><p>if <code class="docutils literal notranslate"><span class="pre">target</span></code> and <code class="docutils literal notranslate"><span class="pre">host</span></code> memory spaces are different:</p>
<blockquote>
<div><ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">storage_update_target</span></code> function is needed to define how to move the data from host to target.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">storage_update_host</span></code> function is needed to define how to move the data from target to host.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">storage_make_target_view</span></code> function is needed to define a target view.</p></li>
</ul>
</div></blockquote>
</li>
</ul>
</div></blockquote>
</section>
</section>
<section id="sid-concept-adaptation">
<h3>SID Concept Adaptation<a class="headerlink" href="#sid-concept-adaptation" title="Permalink to this heading"></a></h3>
<p>The <a class="reference internal" href="#stencil-composition"><span class="std std-ref">Stencil Composition</span></a> Library doesn’t use the Storage Library directly.
Instead the <code class="docutils literal notranslate"><span class="pre">SID</span> <span class="pre">Concept</span></code> is used to specify the requirements on input/output fields.
<code class="docutils literal notranslate"><span class="pre">Data</span> <span class="pre">store</span></code> models <code class="docutils literal notranslate"><span class="pre">SID</span></code> if the <code class="docutils literal notranslate"><span class="pre">gridtools/storage/sid.hpp</span></code> header is included.</p>
</section>
</section>
<section id="stencil-operators">
<span id="id4"></span><h2>Stencil Operators<a class="headerlink" href="#stencil-operators" title="Permalink to this heading"></a></h2>
<p><em>Stencil operators</em> are the <cite>GridTools</cite>-equivalent of <cite>functors</cite> in regular C++ code.  They are assumed to have no
side-effects and no status (which is why they are marked as <cite>static</cite>). As functions they have an <cite>interface</cite> and an
<cite>implementation</cite>. The interface informs the caller, on the order and types of arguments that have to be passed to
it, and the implementation, on the names and types of the symbols available to it.</p>
<p>The stencil operator specifies the computation to be performed in each grid point of the <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a> of the
stencil <a class="reference internal" href="../glossary/glossary.html#term-Computation"><span class="xref std std-term">Computation</span></a>. In the implementation, a point of the <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a> at which the stencil operator
is called is referred to as <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Point"><span class="xref std std-term">Iteration Point</span></a>.</p>
<p>A stencil operator is a <cite>class</cite>, or a <cite>struct</cite>, with the following public properties:</p>
<ul class="simple">
<li><p>A list of <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessor</span></a> types that are associated to the data fields the stencil operator will access
in its implementation.</p></li>
<li><p>A <code class="docutils literal notranslate"><span class="pre">param_list</span></code> listing all the <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessor</span></a> types defined above. They are created using <code class="docutils literal notranslate"><span class="pre">make_param_list</span></code>.</p></li>
<li><p>A set of <em>static template member functions</em> named <code class="docutils literal notranslate"><span class="pre">apply</span></code>, also referred to as <a class="reference internal" href="../glossary/glossary.html#term-Apply-Method"><span class="xref std std-term">Apply-Methods</span></a>.
These functions should be annotated with <code class="docutils literal notranslate"><span class="pre">GT_FUNCTION</span></code> which ensures that the functions can be run on GPU and that
they are inlined.</p></li>
</ul>
<p>The user may add additional static functions for internal usage within the stencil operator.</p>
<p>See the <a class="reference internal" href="#stencil-operator-example"><span class="std std-ref">Example</span></a> for a concrete usage of the syntax of the stencil operators.</p>
<section id="accessor-type">
<h3>Accessor Type<a class="headerlink" href="#accessor-type" title="Permalink to this heading"></a></h3>
<p><a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">accessors</span></a> indicate an access to a <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Store</span></a> of a <a class="reference internal" href="../glossary/glossary.html#term-Grid"><span class="xref std std-term">Grid</span></a>.</p>
<p>Accessors are defined as follows:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">accessor</span><span class="o">&lt;</span><span class="n">I</span><span class="p">,</span><span class="w"> </span><span class="k">intent</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="k">extent</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">extent</span><span class="o">&lt;&gt;</span><span class="p">,</span><span class="w"> </span><span class="n">N</span><span class="p">]</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>where</p>
<ul>
<li><p><code class="docutils literal notranslate"><span class="pre">name</span></code> is the name associated to the <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessor</span></a> and will be used in the implementation of the stencil
operator. The name of an accessor is only used within the stencil.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">I</span></code> is an integral index. The indices of the <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessors</span></a> in a given stencil operators <em>must</em> be
in the range from 0 to N - 1, where N is the number of <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessors</span></a> used by the stencil operator. No
index can appear twice. If these rules are not followed the compilation fails.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">intent</span></code> indicates the type of access the stencil operator makes to the data associated to the <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessor</span></a>.
Possible values are</p>
<ol class="arabic simple">
<li><p><code class="docutils literal notranslate"><span class="pre">intent::in</span></code> to specify <em>read-only</em> access</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">intent::inout</span></code> to specify <em>read-write</em> access. The <code class="docutils literal notranslate"><span class="pre">extent</span></code> for <cite>i</cite> and <cite>j</cite> for <code class="docutils literal notranslate"><span class="pre">inout</span></code> must be made of all
zeros (see next points)</p></li>
</ol>
<p>Alternatively, the abbreviations <code class="docutils literal notranslate"><span class="pre">in_accessor</span></code> our <code class="docutils literal notranslate"><span class="pre">inout_accessor</span></code> can be used respectively.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">read_only_accessor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">in_accessor</span><span class="o">&lt;</span><span class="n">I</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="k">extent</span><span class="p">,</span><span class="w"> </span><span class="n">N</span><span class="p">]</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
<span class="k">using</span><span class="w"> </span><span class="n">read_write_accessor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">inout_accessor</span><span class="o">&lt;</span><span class="n">I</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="k">extent</span><span class="p">,</span><span class="w"> </span><span class="n">N</span><span class="p">]</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
</li>
<li><p><code class="docutils literal notranslate"><span class="pre">extent</span></code> defines the maximum offsets at which the implementation will access data around the <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Point"><span class="xref std std-term">Iteration
Point</span></a>. They are defined as follows:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">extent</span><span class="o">&lt;</span><span class="n">i_minus</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="n">i_plus</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="n">j_minus</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="n">j_plus</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="n">k_minus</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="n">k_plus</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>An extent takes three pairs of numbers, one pair for each dimension of the iteration space. The first number of the
pair must be non-positive and indicates the maximum offset in the direction of <em>decreasing</em> indices (also called minus
direction*). The second number must be non-negative and indicates the maximum offset in the direction of
<em>increasing</em> indices (also called plus direction*). Numbers can be ommitted and default to zero.</p>
<p>Example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">my_accessor</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">inout_accessor</span><span class="o">&lt;</span><span class="n">I</span><span class="p">,</span><span class="w"> </span><span class="k">extent</span><span class="o">&lt;</span><span class="mi">-1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>This accessor guarantees that at most one element in negative and positive i-direction will be accessed (i.e. we will
never access <code class="docutils literal notranslate"><span class="pre">field(i</span> <span class="pre">+</span> <span class="pre">2)</span></code>). Further, it guarantees that in j-direction, no elements in negative and at most two
elements in positive direction will be accessed. In k-direction, the field is not accessed with any offset.</p>
<p>Note that <code class="docutils literal notranslate"><span class="pre">extent&lt;&gt;</span></code> is a valid extent and indicates that the field is always accessed at the iteration point.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Behaviour is undefined if a field is accessed at extents that are outside the box defined by <code class="docutils literal notranslate"><span class="pre">extent</span></code>. Under
certain configuration this might lead to erroneous results. Extents bigger than the ones actually accessed by the
implementation will potentially result in performance loss.</p>
</div>
</li>
<li><p><code class="docutils literal notranslate"><span class="pre">N</span></code> identifies the dimension of the offsets that are used with the given accessor. The default is calculated from
the <cite>extent&lt;&gt;</cite> parameter.</p></li>
</ul>
</section>
<section id="parameter-list">
<h3>Parameter list<a class="headerlink" href="#parameter-list" title="Permalink to this heading"></a></h3>
<p>The parameter list <code class="docutils literal notranslate"><span class="pre">param_list</span></code> is defined as follows:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="k">param_list</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">make_param_list</span><span class="o">&lt;</span><span class="n">accessors</span><span class="p">...</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>where <code class="docutils literal notranslate"><span class="pre">accessors...</span></code> is a comma separated list of all the <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessors</span></a> specified before. For example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">in_</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">in_accessor</span><span class="o">&lt;</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
<span class="k">using</span><span class="w"> </span><span class="n">out_</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">inout_accessor</span><span class="o">&lt;</span><span class="mi">1</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>

<span class="k">using</span><span class="w"> </span><span class="k">param_list</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">make_param_list</span><span class="o">&lt;</span><span class="n">in_</span><span class="p">,</span><span class="w"> </span><span class="n">out_</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Specifying the parameter list is mandatory because C++ cannot infer what types have been defined as accessors.</p>
</div>
</section>
<section id="apply-method">
<span id="stencil-operators-apply-method"></span><h3><cite>Apply</cite>-Method<a class="headerlink" href="#apply-method" title="Permalink to this heading"></a></h3>
<p>A stencil operator can have several <code class="docutils literal notranslate"><span class="pre">apply</span></code>-methods, defining the functors to be applied at different <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical
Intervals</span></a>. An <code class="docutils literal notranslate"><span class="pre">apply</span></code> method takes at most two arguments:</p>
<ol class="arabic simple">
<li><p>The first argument is a templated parameter usually called <code class="docutils literal notranslate"><span class="pre">eval</span></code> that holds internal information.</p></li>
<li><p>The second arguments specifies the Vertical Interval to which functor is applied to. If the grid is not created with
an axis, but only with a vertical size, this argument can be skipped and the stencil is applied to the whole axis.
Whenever the grid is created using an axis, the user should specify a vertical interval for each method.</p></li>
</ol>
<p>Example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">Eval</span><span class="o">&gt;</span><span class="w"></span>
<span class="cp">GT_FUNCTION</span><span class="w"> </span><span class="k">static</span><span class="w"></span>
<span class="kt">void</span><span class="w"> </span><span class="n">apply</span><span class="p">(</span><span class="n">Eval</span><span class="w"> </span><span class="k">const</span><span class="o">&amp;</span><span class="w"> </span><span class="n">eval</span><span class="p">,</span><span class="w"> </span><span class="n">region</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>Within an <code class="docutils literal notranslate"><span class="pre">apply</span></code>-method, data bound to the accessors can be accessed through the <code class="docutils literal notranslate"><span class="pre">eval</span></code> argument.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">eval</span><span class="p">(</span><span class="n">accessor_name</span><span class="p">())</span><span class="w"></span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Make sure to add the parentheses after the accessor.</p>
</div>
<p>The previous syntax will evaluate the accessor at the iteration point. Values can be accessed at offsets relative to the
evaluation point by passing a sequence of integral indices to the accessor:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">eval</span><span class="p">(</span><span class="n">accessor_name</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">-1</span><span class="p">))</span><span class="w"></span>
</pre></div>
</div>
<p>This accesses an element at an offset of 1 in the first dimension (plus direction) of the <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Point"><span class="xref std std-term">Iteration Point</span></a>, and
an offset of 1 in the minus direction in the third dimension. A way to think of it is to consider the point of
evaluation as a triplet <code class="docutils literal notranslate"><span class="pre">i</span></code>, <code class="docutils literal notranslate"><span class="pre">j</span></code> and <code class="docutils literal notranslate"><span class="pre">k</span></code>, and those offsets are added to the current index coordinates to
identifying the actual value to access. The evaluation returns a reference to the value for accessors with
<code class="docutils literal notranslate"><span class="pre">inout</span></code>-intent, and a const reference for <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessors</span></a> with <code class="docutils literal notranslate"><span class="pre">in</span></code>-intent.</p>
<p>The next example calculates the average of two neighbored values in the first dimension and assign it to the output
field:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">())</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)))</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="mi">2</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<ol class="arabic">
<li><p>Writing into non-zero offsets is not allowed.</p></li>
<li><p>Stencil operators must not have any horizontal dependencies within itself. That means: If a stencil operator writes
into a field, it must not access this field with non-zero horizontal offset.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">));</span><span class="w"> </span><span class="c1">// ok, if this stage does not write to in</span>
<span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">));</span><span class="w"> </span><span class="c1">// undefined!</span>
<span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">));</span><span class="w"> </span><span class="c1">// undefined, if execution policy is parallel (see stencil composition sections)</span>
</pre></div>
</div>
</li>
</ol>
</div>
</section>
<section id="example">
<span id="stencil-operator-example"></span><h3>Example<a class="headerlink" href="#example" title="Permalink to this heading"></a></h3>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">struct</span><span class="w"> </span><span class="nc">flx_function</span><span class="w"> </span><span class="p">{</span><span class="w"></span>

<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">inout_accessor</span><span class="o">&lt;</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">in</span><span class="w">  </span><span class="o">=</span><span class="w"> </span><span class="k">in_accessor</span><span class="o">&lt;</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="k">extent</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="o">&gt;&gt;</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">lap</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">in_accessor</span><span class="o">&lt;</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="k">extent</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="o">&gt;&gt;</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="k">param_list</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">make_param_list</span><span class="o">&lt;</span><span class="n">out</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="n">lap</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">Evaluation</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="cp">GT_FUNCTION</span><span class="w"> </span><span class="k">static</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">apply</span><span class="p">(</span><span class="n">Evaluation</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="n">eval</span><span class="p">,</span><span class="w"> </span><span class="n">full_interval</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">lap</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">))</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">lap</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">));</span><span class="w"></span>
<span class="w">        </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="p">(</span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">))</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)))</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">            </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">0.</span><span class="p">;</span><span class="w"></span>
<span class="w">        </span><span class="p">}</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="expressions">
<h3>Expressions<a class="headerlink" href="#expressions" title="Permalink to this heading"></a></h3>
<p>Multiple calls to eval can be merged into one when the <code class="docutils literal notranslate"><span class="pre">expressions</span></code>-namespace is imported. This is possible, because
calculations with accessors produce expressions that can be evaluated. Readability can be greatly improved, but it might
have negative impact on compilation time.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">Evaluation</span><span class="o">&gt;</span><span class="w"></span>
<span class="cp">GT_FUNCTION</span><span class="w"> </span><span class="k">static</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">apply</span><span class="p">(</span><span class="n">Evaluation</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="n">eval</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="k">namespace</span><span class="w"> </span><span class="nn">stencil</span><span class="o">::</span><span class="nn">cartesian</span><span class="o">::</span><span class="nn">expressions</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">lap</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">lap</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">));</span><span class="w"></span>
<span class="w">    </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">()</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">in</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">))</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">0.</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>
<span class="p">}</span><span class="w"></span>
</pre></div>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">expressions</span></code>-namespace has overloads for common operations on accessors, namely <code class="docutils literal notranslate"><span class="pre">+</span></code>, <code class="docutils literal notranslate"><span class="pre">-</span></code>, <code class="docutils literal notranslate"><span class="pre">*</span></code>, <code class="docutils literal notranslate"><span class="pre">/</span></code>,
<code class="docutils literal notranslate"><span class="pre">pow&lt;&gt;</span></code>. Using those operations with accessors creates an expression that can be evaluated using <code class="docutils literal notranslate"><span class="pre">eval</span></code>.</p>
<p>Note that those expressions can also be used to lazily evaluate expressions. This provides a way to reuse expressions in
your code:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="k">namespace</span><span class="w"> </span><span class="nn">expressions</span><span class="p">;</span><span class="w"></span>
<span class="k">constexpr</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">cond</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">out</span><span class="p">()</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">in</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">);</span><span class="w"></span>
<span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">Evaluation</span><span class="o">&gt;</span><span class="w"></span>
<span class="cp">GT_FUNCTION</span><span class="w"> </span><span class="k">static</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">apply</span><span class="p">(</span><span class="n">Evaluation</span><span class="w"> </span><span class="o">&amp;</span><span class="n">eval</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">lap</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">lap</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">));</span><span class="w"></span>
<span class="w">    </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">eval</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mi">0</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">0.</span><span class="p">;</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>
<span class="p">}</span><span class="w"></span>
</pre></div>
</div>
</section>
</section>
<section id="execution-model">
<span id="id5"></span><h2>Execution Model<a class="headerlink" href="#execution-model" title="Permalink to this heading"></a></h2>
<p>Stencil operations are executed in a three dimensional <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a>. The first two dimensions of the
iteration space, usually referred to as <cite>I</cite> and <cite>J</cite> dimensions, identify the horizontal dimension. There is no
prescription for the order of stencil operators in different points of the same <cite>IJ</cite> plane. Stencil operators in the
third dimension of the iteration space, usually referred as <cite>K</cite> or vertical dimension, can have prescribed order
of executions. There are three different execution policies for the <cite>K</cite> dimension:</p>
<ul class="simple">
<li><p><cite>forward</cite>: The computation at index <cite>k</cite> in the vertical dimension is executed after index <cite>k - 1</cite> for all points in
the horizontal plane;</p></li>
<li><p><cite>backward</cite>: The computation at index <cite>k</cite> in the vertical dimension is executed after index <cite>k + 1</cite> for all points in
the horizontal plane;</p></li>
<li><p><cite>parallel</cite>: No order is specified and execution can happen concurrently.</p></li>
</ul>
<p>More concretely, a multistage is a list of stages, implemented with stencil operators, to be executed with a certain
execution policy. A computation combines several multistages and executes one multistage after the other.</p>
<ul class="simple">
<li><p>For each <cite>IJ</cite> plane, the stages of a multistage will be executed strictly one after the other. This means that a
stage can assume that the previous stage has been applied to the the whole <cite>IJ</cite> plane before its execution. The user
can explicitly create independent stages that do not require this restriction.</p></li>
<li><p>If the execution policy is <cite>parallel</cite>, a stage cannot impose any assumptions on which stages are applied before in
another <cite>IJ</cite> plane.</p></li>
<li><p>If the execution policy is <cite>forward</cite>, it is guaranteed that if a stage is executed at index <code class="docutils literal notranslate"><span class="pre">k</span></code> then all stages of
the multistage were already applied to the same column with smaller <code class="docutils literal notranslate"><span class="pre">k</span></code>. There is no guarantee that previous stages
of the multistage have not already been applied to the indices in the same column with larger <code class="docutils literal notranslate"><span class="pre">k</span></code>.</p></li>
<li><p>If the execution policy is <cite>backward</cite>, it is guaranteed that if a stage is executed at index <code class="docutils literal notranslate"><span class="pre">k</span></code> then all stages of
the multistage were already applied to the same column with larger <code class="docutils literal notranslate"><span class="pre">k</span></code>. Similarly, there is no guarantee that
previous stages of the multistage have not already been applied to the indices in the same column with smaller <code class="docutils literal notranslate"><span class="pre">k</span></code>.</p></li>
</ul>
<section id="extended-compute-domain-of-a-stage">
<h3>Extended Compute Domain of a Stage<a class="headerlink" href="#extended-compute-domain-of-a-stage" title="Permalink to this heading"></a></h3>
<p>For a clear specification of access restrictions, we need to introduce the concept of extended compute domains.</p>
<p>If a stage is accessing neighbor grid points of field <cite>a</cite>, the computation will consume halo points at the boundary of the compute domain.
In case field <cite>a</cite> was written in a previous stage of the same multi-stage, the compute domain of the first stage needs to be extended, such
that the <em>extended compute domain</em> of the stage covers the extent of the read access in the later stage.</p>
</section>
<section id="access-restrictions">
<h3>Access restrictions<a class="headerlink" href="#access-restrictions" title="Permalink to this heading"></a></h3>
<p>The execution model imposes restrictions on how accessors can be evaluated. These restrictions are not checked by GridTools,
violating these rules results in undefined behavior.</p>
<p>Definitions:</p>
<blockquote>
<div><ul class="simple">
<li><p><em>Writing/reading to/from an accessor</em> is short for <em>writing/reading to/from a field through an accessor</em>.
Note that the same field could be bound to different accessors, though this is not recommended.</p></li>
</ul>
</div></blockquote>
<ul class="simple">
<li><p><em>Previous/later statement</em> is short for <em>previous/later statement in the same stage or any previous/later stage</em>.</p></li>
</ul>
<p>The following rules apply to accesses in stages <em>within the same multi-stage</em>:</p>
<blockquote>
<div><ol class="arabic">
<li><p>A stage may read from an accessor unconditionally, if the same accessor is never written to.</p></li>
<li><p>Within a computation an accessor must be written only in a single stage. Within this stage, the accessor must not be read with an horizontal offset.</p></li>
<li><p>A stage may write to an accessor, if there is no read from the same accessor in a previous statement. (Read without horizontal offsets after write is allowed.)</p></li>
<li><p>A stage may read from an accessor and write to it in a later statement, if</p>
<ol class="loweralpha simple">
<li><p>all reads of this accessor are in stages where the compute domain is not extended;</p></li>
<li><p>all reads of this accessor are without offsets.</p></li>
</ol>
<p>(Write after read only, if read is in non-extended stages and no offsets are used.)</p>
</li>
<li><p>Temporaries are not restricted by 4, i.e. a stage may write to an accessor after read in a previous stage of the same accessor, unconditionally.
Restriction 2 still applies. Note that read before write is only useful, if the temporary was written in a previous multi-stage.</p></li>
</ol>
</div></blockquote>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Higher level tools generating GridTools code, may choose to ignore rule 2 and use <cite>stage_with_extent()</cite> instead.</p>
</div>
</section>
<section id="examples-for-access-restrictions">
<h3>Examples for Access Restrictions<a class="headerlink" href="#examples-for-access-restrictions" title="Permalink to this heading"></a></h3>
<p>In the following we show only the statements inside a stage, correct extents (matching the offset in the accessors are assumed).
All stages are assumed to be in the same multi-stage.</p>
<blockquote>
<div><div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// stage 0</span>
<span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"></span>

<span class="c1">// stage 1</span>
<span class="n">eval</span><span class="p">(</span><span class="n">b</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">(</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">));</span><span class="w"> </span><span class="c1">// ok, read after write</span>
</pre></div>
</div>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// stage 0</span>
<span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"></span>

<span class="c1">// stage 1</span>
<span class="n">eval</span><span class="p">(</span><span class="n">b</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">());</span><span class="w"></span>
<span class="n">eval</span><span class="p">(</span><span class="n">c</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"></span>

<span class="c1">// stage 2</span>
<span class="n">eval</span><span class="p">(</span><span class="n">d</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">c</span><span class="p">(</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">));</span><span class="w"> </span><span class="c1">// ok, read after write everywhere</span>
</pre></div>
</div>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// stage 0</span>
<span class="n">eval</span><span class="p">(</span><span class="n">b</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">());</span><span class="w"></span>

<span class="c1">// stage 1 (extended domain because of read from c in stage 2)</span>
<span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"> </span><span class="c1">// error, violating rule 4a</span>
<span class="n">eval</span><span class="p">(</span><span class="n">c</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"></span>

<span class="c1">// stage 2</span>
<span class="n">eval</span><span class="p">(</span><span class="n">d</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">c</span><span class="p">(</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">));</span><span class="w"></span>
</pre></div>
</div>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// stage 0</span>
<span class="n">eval</span><span class="p">(</span><span class="n">b</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">(</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">));</span><span class="w"></span>

<span class="c1">// stage 1</span>
<span class="n">eval</span><span class="p">(</span><span class="n">a</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"> </span><span class="c1">// error, violating rule 4b</span>
</pre></div>
</div>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// stage 0</span>
<span class="n">eval</span><span class="p">(</span><span class="n">b</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">tmp</span><span class="p">(</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">));</span><span class="w"></span>

<span class="c1">// stage 1</span>
<span class="n">eval</span><span class="p">(</span><span class="n">tmp</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"> </span><span class="c1">// ok, see rule 5, temporaries don&#39;t violate rule 4</span>
</pre></div>
</div>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">// stage 0</span>
<span class="n">eval</span><span class="p">(</span><span class="n">tmp_a</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_value</span><span class="p">;</span><span class="w"></span>

<span class="c1">// stage 1</span>
<span class="n">eval</span><span class="p">(</span><span class="n">tmp_b</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">tmp_a</span><span class="p">());</span><span class="w"></span>

<span class="c1">// stage 2</span>
<span class="n">eval</span><span class="p">(</span><span class="n">tmp_a</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">some_other_value</span><span class="p">;</span><span class="w"> </span><span class="c1">// error, violating rule 2</span>
</pre></div>
</div>
</div></blockquote>
</section>
</section>
<section id="stencil-composition">
<span id="id6"></span><h2>Stencil Composition<a class="headerlink" href="#stencil-composition" title="Permalink to this heading"></a></h2>
<section id="defining-the-iteration-space-the-grid">
<span id="defining-iteration-space"></span><h3>Defining the Iteration Space: the Grid<a class="headerlink" href="#defining-the-iteration-space-the-grid" title="Permalink to this heading"></a></h3>
<p>The <a class="reference internal" href="../glossary/glossary.html#term-Stencil-Operator"><span class="xref std std-term">Stencil Operators</span></a> describe operations on a single <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Point"><span class="xref std std-term">Iteration Point</span></a>.
The <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a> defines on which points the operator should be applied. In this section
we cover how to define the <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a> with the <code class="docutils literal notranslate"><span class="pre">grid</span></code> object.</p>
<p><cite>GridTools</cite> offers a set of functions which ease the construction of the <code class="docutils literal notranslate"><span class="pre">grid</span></code>:</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv49make_gridiii">
<span id="_CPPv39make_gridiii"></span><span id="_CPPv29make_gridiii"></span><span id="make_grid__i.i.i"></span><span class="kt"><span class="pre">auto</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">make_grid</span></span></span><span class="sig-paren">(</span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">size_i</span></span>, <span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">size_j</span></span>, <span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">size_k</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv49make_gridiii" title="Permalink to this definition"></a><br /></dt>
<dd><p>The simplest <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a> will iterate the cube defined by the <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Intervals</span></a> <code class="docutils literal notranslate"><span class="pre">[0,</span>
<span class="pre">size_i-1]</span></code>, <code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">size_j-1]</span></code>, <code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">size_k-1]</span></code>. This function must only be used if all stages used within the
computation have zero extents.</p>
</dd></dl>

<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv49make_grid15halo_descriptor15halo_descriptori">
<span id="_CPPv39make_grid15halo_descriptor15halo_descriptori"></span><span id="_CPPv29make_grid15halo_descriptor15halo_descriptori"></span><span id="make_grid__halo_descriptor.halo_descriptor.i"></span><span class="kt"><span class="pre">auto</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">make_grid</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">halo_descriptor</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">halo_i</span></span>, <span class="n"><span class="pre">halo_descriptor</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">halo_j</span></span>, <span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">size_z</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv49make_grid15halo_descriptor15halo_descriptori" title="Permalink to this definition"></a><br /></dt>
<dd><p>For finer control of the iteration space a <a class="reference internal" href="#halo-descriptor"><span class="std std-ref">Halo Descriptor</span></a> can be passed for the horizontal directions (<code class="docutils literal notranslate"><span class="pre">I</span></code>
and <code class="docutils literal notranslate"><span class="pre">J</span></code>).  The 3rd and 4th argument of the <code class="docutils literal notranslate"><span class="pre">halo_descriptor</span></code> define the start and the endpoint of the
<a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a>. Note that the minus (first argument) and plus (second argument) of the <code class="docutils literal notranslate"><span class="pre">halo_descriptor</span></code>
should be larger than the maximum extent of the whole computation.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="n">grid</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nl">make_grid</span><span class="p">({</span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">},</span><span class="w"> </span><span class="p">{</span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">20</span><span class="p">,</span><span class="w"> </span><span class="mi">30</span><span class="p">},</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>This example will create a grid. The iteration space in <code class="docutils literal notranslate"><span class="pre">i</span></code> and <code class="docutils literal notranslate"><span class="pre">j</span></code> will be <code class="docutils literal notranslate"><span class="pre">[10,</span> <span class="pre">20]</span></code> (including <code class="docutils literal notranslate"><span class="pre">20</span></code>!). The
computation is required not to access data outside of <code class="docutils literal notranslate"><span class="pre">[7,</span> <span class="pre">23]</span></code>. The iteration space in <code class="docutils literal notranslate"><span class="pre">k</span></code> is <code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">9]</span></code>.</p>
</dd></dl>

<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv49make_gridii4Axis">
<span id="_CPPv39make_gridii4Axis"></span><span id="_CPPv29make_gridii4Axis"></span><span id="make_grid__i.i.Axis"></span><span class="kt"><span class="pre">auto</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">make_grid</span></span></span><span class="sig-paren">(</span><span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">size_i</span></span>, <span class="kt"><span class="pre">int</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">size_j</span></span>, <span class="n"><span class="pre">Axis</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">axis</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv49make_gridii4Axis" title="Permalink to this definition"></a><br /></dt>
<dd><p>The vertical axis needs to be passed to the grid when using several vertical regions. The axis can be constructed by
passing it the size of each of the vertical regions. Details follow in the coming sections.</p>
</dd></dl>

<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv49make_grid15halo_descriptor15halo_descriptor4Axis">
<span id="_CPPv39make_grid15halo_descriptor15halo_descriptor4Axis"></span><span id="_CPPv29make_grid15halo_descriptor15halo_descriptor4Axis"></span><span id="make_grid__halo_descriptor.halo_descriptor.Axis"></span><span class="kt"><span class="pre">auto</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">make_grid</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">halo_descriptor</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">halo_i</span></span>, <span class="n"><span class="pre">halo_descriptor</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">halo_j</span></span>, <span class="n"><span class="pre">Axis</span></span><span class="w"> </span><span class="n sig-param"><span class="pre">axis</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv49make_grid15halo_descriptor15halo_descriptor4Axis" title="Permalink to this definition"></a><br /></dt>
<dd><p>See explanations in other functions.</p>
</dd></dl>

</section>
<section id="vertical-regions-and-vertical-boundary-conditions">
<span id="vertical-regions"></span><h3>Vertical Regions and Vertical Boundary Conditions<a class="headerlink" href="#vertical-regions-and-vertical-boundary-conditions" title="Permalink to this heading"></a></h3>
<p>The <cite>GridTools</cite> <a class="reference internal" href="../glossary/glossary.html#term-Execution-Model"><span class="xref std std-term">Execution Model</span></a> allows to be sequential in the vertical dimension (<code class="docutils literal notranslate"><span class="pre">k</span></code>). Additionally, <cite>GridTools</cite>
offers the possibility to split the vertical dimension into vertical regions,
where stencils can perform different operations. Typical applications of this pattern are models which define
terrain-following coordinates close to the earth surface and flat-coordinates in upper
levels of the atmosphere. Another use-case are vertical boundary-conditions which can
be directly integrated into the stencil operation.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>In the following we will distinguish two concepts: first <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Region"><span class="xref std std-term">Vertical Regions</span></a> are
non-overlapping subsets of the vertical <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a> with run-time defined sizes; second <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical
Intervals</span></a> (or just <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Interval</span></a>) are compile-time defined descriptions
from one vertical level (not every vertical level can be selected, see below) to another.</p>
</div>
<section id="default-interval">
<h4>Default Interval<a class="headerlink" href="#default-interval" title="Permalink to this heading"></a></h4>
<p>In simple applications, where all vertical levels should be treated equally, <cite>GridTools</cite> allows to use a default
<a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Interval</span></a> which covers the full vertical region. In this and only this case the apply methods of the stencil
operators should be defined without specifying an <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Interval</span></a> (see Section
<a class="reference internal" href="#stencil-operators-apply-method"><span class="std std-ref">Apply-Methods</span></a>) and the iteration space should be created using one of the simple
constructors in the <a class="reference internal" href="#defining-iteration-space"><span class="std std-ref">previous section</span></a> (namely, either <code class="docutils literal notranslate"><span class="pre">make_grid(int,</span> <span class="pre">int,</span> <span class="pre">int)</span></code>, or
<code class="docutils literal notranslate"><span class="pre">make_grid(halo_descriptor,</span> <span class="pre">halo_descriptor,</span> <span class="pre">int)</span></code>.</p>
</section>
<section id="defining-vertical-intervals">
<h4>Defining Vertical Intervals<a class="headerlink" href="#defining-vertical-intervals" title="Permalink to this heading"></a></h4>
<p><cite>GridTools</cite> allows to split the full vertical iteration space into regions. The number of vertical regions needs to be specified at
compile-time, while the size of each region can be defined at run-time.</p>
<p>For defining a computation with more than a single vertical region we need to define an <code class="docutils literal notranslate"><span class="pre">axis</span></code> first.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">my_axis_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">axis</span><span class="o">&lt;</span><span class="n">N</span><span class="o">&gt;</span><span class="w"></span>
</pre></div>
</div>
<p>where <cite>N</cite> describes the number of vertical regions.</p>
<p>At runtime the axis is instantiated with the sizes of each region,</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">my_axis_t</span><span class="w"> </span><span class="n">my_axis</span><span class="p">{</span><span class="n">N0</span><span class="p">,</span><span class="w"> </span><span class="n">N1</span><span class="p">,</span><span class="w"> </span><span class="n">N2</span><span class="p">,</span><span class="w"> </span><span class="p">...};</span><span class="w"></span>
</pre></div>
</div>
<p>where the <code class="docutils literal notranslate"><span class="pre">Nx</span></code> are the sizes of region <code class="docutils literal notranslate"><span class="pre">x</span></code>. With our axis object we can now generate a grid with one of the following
signatures</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">grid</span><span class="w"> </span><span class="nl">make_grid</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">size_i</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">size_j</span><span class="p">,</span><span class="w"> </span><span class="k">axis</span><span class="o">&lt;</span><span class="n">N</span><span class="o">&gt;</span><span class="w"> </span><span class="n">my_axis</span><span class="p">)</span><span class="w"></span>
<span class="n">grid</span><span class="w"> </span><span class="nl">make_grid</span><span class="p">(</span><span class="k">halo_descriptor</span><span class="w"> </span><span class="n">halo_i</span><span class="p">,</span><span class="w"> </span><span class="k">halo_descriptor</span><span class="w"> </span><span class="n">halo_j</span><span class="p">,</span><span class="w"> </span><span class="k">axis</span><span class="o">&lt;</span><span class="n">N</span><span class="o">&gt;</span><span class="w"> </span><span class="n">my_axis</span><span class="p">)</span><span class="w"></span>
</pre></div>
</div>
<p>Each region already defines a <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Interval</span></a> which can be queried from the axis by</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">first_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_axis_t</span><span class="o">::</span><span class="n">get_interval</span><span class="o">&lt;</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
<span class="k">using</span><span class="w"> </span><span class="n">second_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_axis_t</span><span class="o">::</span><span class="n">get_interval</span><span class="o">&lt;</span><span class="mi">1</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
<span class="k">using</span><span class="w"> </span><span class="n">full_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_axis_t</span><span class="o">::</span><span class="n">full_interval</span><span class="p">;</span><span class="w"></span>
<span class="p">...</span><span class="w"></span>
</pre></div>
</div>
<p>Note that the <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Intervals</span></a> are compile time object, i.e. C++ types. These <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Intervals</span></a>
are used for defining which <a class="reference internal" href="../glossary/glossary.html#term-Apply-Method"><span class="xref std std-term">Apply-Method</span></a> version of the stencil operator should be used during the iteration.</p>
<p><a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Interval</span></a> provides meta-functions which allow to define modified <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Intervals</span></a></p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">interval::first_level</span></code>, which is the Interval (a C++ type) describing the first level of the Interval</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">interval::last_level</span></code>, which is Interval describing the last level of the Interval</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">interval::modify&lt;begin,</span> <span class="pre">end&gt;</span></code>, which is an Interval extended (<code class="docutils literal notranslate"><span class="pre">begin</span></code> &lt; 0) or shrunk (<code class="docutils literal notranslate"><span class="pre">begin</span></code> &gt; 0) at the
beginning of the Interval and extended (<code class="docutils literal notranslate"><span class="pre">end</span></code> &gt; 0) or shrunk (<code class="docutils literal notranslate"><span class="pre">end</span></code> &lt; 0) at the end of the Interval.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">interval::shift&lt;value&gt;</span></code>, which is the Interval shifted by <code class="docutils literal notranslate"><span class="pre">value</span></code>, i.e. it is a shortcut for <code class="docutils literal notranslate"><span class="pre">modify&lt;value,</span>
<span class="pre">value&gt;</span></code>.</p></li>
</ul>
<p>Examples:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">axis_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">axis</span><span class="o">&lt;</span><span class="mi">2</span><span class="o">&gt;</span><span class="p">;</span><span class="w"> </span><span class="c1">// axis with 2 vertical regions</span>
<span class="n">axis_t</span><span class="w"> </span><span class="nf">my_axis</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">);</span><span class="w"> </span><span class="c1">// iteration space spans 5 + 10 levels</span>

<span class="k">using</span><span class="w"> </span><span class="n">first_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">axis_t</span><span class="o">::</span><span class="n">get_interval</span><span class="o">&lt;</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">;</span><span class="w">         </span><span class="c1">// interval [0, 4]</span>
<span class="k">using</span><span class="w"> </span><span class="n">second_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">axis_t</span><span class="o">::</span><span class="n">get_interval</span><span class="o">&lt;</span><span class="mi">1</span><span class="o">&gt;</span><span class="p">;</span><span class="w">        </span><span class="c1">// [5, 14]</span>
<span class="k">using</span><span class="w"> </span><span class="n">full_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">my_axis_t</span><span class="o">::</span><span class="n">full_interval</span><span class="p">;</span><span class="w">         </span><span class="c1">// [0, 14]</span>

<span class="k">using</span><span class="w"> </span><span class="n">first_level_only_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">full_interval</span><span class="o">::</span><span class="n">first_level</span><span class="p">;</span><span class="w"> </span><span class="c1">// [0]</span>
<span class="k">using</span><span class="w"> </span><span class="n">last_level_only_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">full_interval</span><span class="o">::</span><span class="n">last_level</span><span class="p">;</span><span class="w">   </span><span class="c1">// [14]</span>
<span class="k">using</span><span class="w"> </span><span class="n">a_middle_level_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">second_interval</span><span class="o">::</span><span class="n">first_level</span><span class="p">;</span><span class="w"> </span><span class="c1">// [5]</span>

<span class="k">using</span><span class="w"> </span><span class="n">a_middle_interval</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">a_middle_level_interval</span><span class="o">::</span><span class="n">modify</span><span class="o">&lt;</span><span class="mi">-1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="o">&gt;</span><span class="p">;</span><span class="w"> </span><span class="c1">// [4, 5]</span>
<span class="k">using</span><span class="w"> </span><span class="n">a_middle_interval2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">a_middle_interval</span><span class="o">::</span><span class="n">shift</span><span class="o">&lt;</span><span class="mi">1</span><span class="o">&gt;</span><span class="p">;</span><span class="w">           </span><span class="c1">// [5, 6]</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Only two levels around a vertical region can be addressed in this way. This can be changed by using the method
described in the <a class="reference internal" href="#vertical-regions-advanced"><span class="std std-ref">next section</span></a>.</p>
</div>
</section>
<section id="advanced-functionality-for-vertical-intervals">
<span id="vertical-regions-advanced"></span><h4>Advanced Functionality for Vertical Intervals<a class="headerlink" href="#advanced-functionality-for-vertical-intervals" title="Permalink to this heading"></a></h4>
<p>The <code class="docutils literal notranslate"><span class="pre">axis</span></code> type has an additional template parameter to change a default restriction:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="kt">size_t</span><span class="w"> </span><span class="n">NIntervals</span><span class="p">,</span><span class="w"> </span><span class="n">axis_config</span><span class="o">::</span><span class="n">offset_limit</span><span class="o">&lt;</span><span class="kt">int</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;&gt;</span><span class="w"> </span><span class="k">class</span><span class="w"> </span><span class="nc">axis</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">offset_limit</span></code> defines how many levels around each vertical region can be addressed by specialized
<code class="docutils literal notranslate"><span class="pre">Apply</span></code>-methods. Increasing this value could have negative implications on compile-time.</p></li>
</ul>
</section>
</section>
<section id="run-computation-driver">
<h3><strong>run</strong>: Computation Driver<a class="headerlink" href="#run-computation-driver" title="Permalink to this heading"></a></h3>
<p><cite>GridTools</cite> provides the <code class="docutils literal notranslate"><span class="pre">gridtools::run</span></code> function to execute a stencil computation. It has the following signature:</p>
<dl class="cpp function">
<dt class="sig sig-object cpp" id="_CPPv43run13specification7backend4gridDp6fields">
<span id="_CPPv33run13specification7backend4gridDp6fields"></span><span id="_CPPv23run13specification7backend4gridDp6fields"></span><span id="run__specification.backend.grid.fieldsDp"></span><span class="kt"><span class="pre">void</span></span><span class="w"> </span><span class="sig-name descname"><span class="n"><span class="pre">run</span></span></span><span class="sig-paren">(</span><span class="n"><span class="pre">specification</span></span>, <span class="n"><span class="pre">backend</span></span>, <span class="n"><span class="pre">grid</span></span>, <span class="n"><span class="pre">fields</span></span><span class="p"><span class="pre">...</span></span><span class="sig-paren">)</span><a class="headerlink" href="#_CPPv43run13specification7backend4gridDp6fields" title="Permalink to this definition"></a><br /></dt>
<dd></dd></dl>

<p>The types of parameters are not specified on purpose: none of the types that <code class="docutils literal notranslate"><span class="pre">run</span></code> accepts are part of the public API.
The parameters have the following meaning:</p>
<blockquote>
<div><ul class="simple">
<li><p><strong>specification</strong> describes the stencil composition. Even thought technically it is a callable object, it never
gets called. It is better to think about this parameter as a micro script that is written in <cite>GridTools</cite>’s DSL.</p></li>
<li><p><strong>backend</strong> specifies the <a class="reference internal" href="../glossary/glossary.html#term-Backend"><span class="xref std std-term">Backend</span></a> that is used to execute the computation.</p></li>
<li><p><strong>grid</strong> defines the computation space</p></li>
<li><p><strong>fields …</strong> – the storages on which the computation is performed. The number of the fields is defined by
the <code class="docutils literal notranslate"><span class="pre">specification</span></code> parameter. The storage types have to model the SID concept.</p></li>
</ul>
</div></blockquote>
</section>
<section id="stencil-composition-specification">
<h3>Stencil Composition Specification<a class="headerlink" href="#stencil-composition-specification" title="Permalink to this heading"></a></h3>
<p>Informally the first parameter of the <code class="docutils literal notranslate"><span class="pre">run</span></code> should be a generic lambda function, where its arguments represent
the fields that participate in the computation. Please mind the terminology, the specification arguments are not fields,
they just represent them. They serve as place holders. The only thing you can do with those arguments is passing them to
the <cite>GridTools</cite> syntax constructs described below.</p>
<p>The temporary fields (that are managed by <cite>GridTools</cite>) should be declared with the lambda by using <code class="docutils literal notranslate"><span class="pre">GT_DECLARE_TMP</span></code>.
It accepts the type of the element as a first parameter. Other parameters are the names of the temporaries.</p>
<p>The specification (this generic lambda) returns a single C++ expression split by dots into <cite>GridTools</cite> syntax clauses.
The first clause always specifies an execution model. The next zero or more clauses describe software caches. Other
clauses correspond to computation stages.</p>
<p>The following example demonstrates how to create a specification for a diffusion operator.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">spec</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[](</span><span class="k">auto</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">coeff</span><span class="p">,</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">out</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="cp">GT_DECLARE_TMP</span><span class="p">(</span><span class="kt">double</span><span class="p">,</span><span class="w"> </span><span class="n">lap</span><span class="p">,</span><span class="w"> </span><span class="n">flx</span><span class="p">,</span><span class="w"> </span><span class="n">fly</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">return</span><span class="w"> </span><span class="nl">execute_parallel</span><span class="p">()</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">ij_cached</span><span class="p">(</span><span class="n">lap</span><span class="p">,</span><span class="w"> </span><span class="n">flx</span><span class="p">,</span><span class="w"> </span><span class="n">fly</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">stage</span><span class="p">(</span><span class="n">lap_function</span><span class="p">(),</span><span class="w"> </span><span class="n">lap</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">stage</span><span class="p">(</span><span class="n">flx_function</span><span class="p">(),</span><span class="w"> </span><span class="n">flx</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="n">lap</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">stage</span><span class="p">(</span><span class="n">fly_function</span><span class="p">(),</span><span class="w"> </span><span class="n">fly</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="n">lap</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">stage</span><span class="p">(</span><span class="n">out_function</span><span class="p">(),</span><span class="w"> </span><span class="n">out</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="n">flx</span><span class="p">,</span><span class="w"> </span><span class="n">fly</span><span class="p">,</span><span class="w"> </span><span class="n">coeff</span><span class="p">);</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
</pre></div>
</div>
<p>This specification can be executed as follows:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="nl">run</span><span class="p">(</span><span class="n">spec</span><span class="p">,</span><span class="w"> </span><span class="n">backend_t</span><span class="p">(),</span><span class="w"> </span><span class="n">grid</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="n">coeff</span><span class="p">,</span><span class="w"> </span><span class="n">out</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<section id="the-notion-of-a-stage">
<span id="stage"></span><h4>The Notion of a Stage<a class="headerlink" href="#the-notion-of-a-stage" title="Permalink to this heading"></a></h4>
<p>The main component of <cite>GridTools</cite> provides the capability of composing different <a class="reference internal" href="../glossary/glossary.html#term-Stage"><span class="xref std std-term">Stages</span></a>. A stage is the
application of a single <a class="reference internal" href="../glossary/glossary.html#term-Stencil-Operator"><span class="xref std std-term">Stencil Operator</span></a> to an <a class="reference internal" href="../glossary/glossary.html#term-Iteration-Space"><span class="xref std std-term">Iteration Space</span></a>. The ability to fuse multiple stages
allows the <cite>GridTools</cite> library to improve the memory locality of the computation by taking advantage of the produce consumer
relations.</p>
<p>A stage is specified by indicating a <a class="reference internal" href="../glossary/glossary.html#term-Stencil-Operator"><span class="xref std std-term">Stencil Operator</span></a> and some
<a class="reference internal" href="../glossary/glossary.html#term-Placeholder"><span class="xref std std-term">Placeholders</span></a> to its arguments. The placeholders are
either parameters of the specification lambda function or temporary placeholders declared by <code class="docutils literal notranslate"><span class="pre">GT_DECLARE_TMP</span></code>.
The syntax is the following:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="p">...</span><span class="w"> </span><span class="p">.</span><span class="n">stage</span><span class="p">(</span><span class="k">operator</span><span class="p">(),</span><span class="w"> </span><span class="n">plc0</span><span class="p">,</span><span class="w"> </span><span class="n">plc1</span><span class="p">,</span><span class="w"> </span><span class="p">...)</span><span class="w"> </span><span class="p">...</span><span class="w"></span>
</pre></div>
</div>
<p>Where the <cite>operator</cite> is the stencil operator of the stage and the <cite>plc0</cite>,
<cite>plc1</cite>, … are the placeholders. The number and the intent of the placeholders depend on the <a class="reference internal" href="../glossary/glossary.html#term-Stencil-Operator"><span class="xref std std-term">Stencil Operator</span></a>.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>When composing stencils, each output data field must be written only
once. Violating this constraint results in undefined behavior.</p>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>The ability to compose stages and profiting from improved memory locality is the key feature of <cite>GridTools</cite>.
If your application
is made of a simple stencil, or stencils that cannot be
composed (see below), then <cite>GridTools</cite> might not be the right solution
for you.</p>
</div>
<p>The data-dependence analysis of <cite>GridTools</cite> will determine the data flow
and the <a class="reference internal" href="../glossary/glossary.html#term-Extent"><span class="xref std std-term">Extents</span></a> at which each data field will be accessed. This
information is then passed to the architecture specific backend for
execution.</p>
</section>
<section id="multi-pass-computations">
<h4>Multi-Pass Computations<a class="headerlink" href="#multi-pass-computations" title="Permalink to this heading"></a></h4>
<p>The stencil composition specification allows also to combine several passes with different execution orders
in a single computation. This is very useful for implementing computations that require two vertical sweeps,
one ascending and one descending.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">vertical_advection</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[](</span><span class="k">auto</span><span class="w"> </span><span class="n">utens_stage</span><span class="p">,</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">wcon</span><span class="p">,</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">u_pos</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">   </span><span class="k">return</span><span class="w"> </span><span class="nl">multi_pass</span><span class="p">(</span><span class="w"></span>
<span class="w">       </span><span class="nl">execute_forward</span><span class="p">().</span><span class="n">stage</span><span class="p">(</span><span class="n">forward_op</span><span class="p">(),</span><span class="w"> </span><span class="n">utens_stage</span><span class="p">,</span><span class="w"> </span><span class="n">wcon</span><span class="p">),</span><span class="w"></span>
<span class="w">       </span><span class="nl">execute_backward</span><span class="p">().</span><span class="n">stage</span><span class="p">(</span><span class="n">backward_op</span><span class="p">(),</span><span class="w"> </span><span class="n">utens_stage</span><span class="p">,</span><span class="w"> </span><span class="n">u_pos</span><span class="p">)</span><span class="w"></span>
<span class="w">       </span><span class="p">);</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
</pre></div>
</div>
<p>This option is particularly useful on multi-core machines with caches, since
the backend can actively pass information between the two stages thus
improving substantially the performance.</p>
</section>
</section>
<section id="selecting-the-backend">
<span id="backend-selection"></span><h3>Selecting the Backend<a class="headerlink" href="#selecting-the-backend" title="Permalink to this heading"></a></h3>
<p>One of the key concepts of <cite>GridTools</cite> is portability between different target architectures.
Stencil operators are written and composed in an architecture-independent way and then instantiated
for a given <code class="docutils literal notranslate"><span class="pre">backend</span></code>. The <code class="docutils literal notranslate"><span class="pre">backend</span></code> is a tag type with with the following possible values:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">stencil::gpu&lt;&gt;</span></code>: a GPU-enabled backend for NVIDIA GPUs</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">stencil::cpu_ifirst&lt;&gt;</span></code>: a backend for modern CPUs with long vector-length.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">stencil::cpu_kfirst&lt;&gt;</span></code>: a legacy CPU-backend with focus on caching of vertical stencils, likely to be removed in the future.</p></li>
</ul>
<p>Currently we recommend one of the following two backends for optimal performance</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">backend_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">stencil</span><span class="o">::</span><span class="n">gpu</span><span class="o">&lt;&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>for GPUs or</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">backend_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">stencil</span><span class="o">::</span><span class="n">cpu_ifirst</span><span class="o">&lt;&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>for modern CPUs.</p>
</section>
</section>
<section id="advanced-functionality">
<h2>Advanced Functionality<a class="headerlink" href="#advanced-functionality" title="Permalink to this heading"></a></h2>
<section id="stencil-functions">
<span id="id7"></span><h3>Stencil Functions<a class="headerlink" href="#stencil-functions" title="Permalink to this heading"></a></h3>
<p>Stencil functions offer the possibility to call <a class="reference internal" href="../glossary/glossary.html#term-Stencil-Operator"><span class="xref std std-term">Stencil Operators</span></a> from other <a class="reference internal" href="../glossary/glossary.html#term-Stencil-Operator"><span class="xref std std-term">Stencil Operators</span></a>.
Two variants are available: functional calls which return a value and procedural calls with side-effect on the given arguments (<a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessors</span></a>).</p>
<section id="function-calls-call">
<h4>Function Calls: <cite>call&lt;&gt;</cite><a class="headerlink" href="#function-calls-call" title="Permalink to this heading"></a></h4>
<p>The basic syntax for function calls is as follows</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">call</span><span class="o">&lt;</span><span class="n">stencil_operator</span><span class="p">,</span><span class="w"> </span><span class="n">vertical_interval</span><span class="p">,</span><span class="w"> </span><span class="n">return_value_type</span><span class="o">&gt;::</span><span class="k">with</span><span class="p">(</span><span class="n">eval</span><span class="p">,</span><span class="w"> </span><span class="n">accessors</span><span class="p">...);</span><span class="w"></span>
</pre></div>
</div>
<p>Let us describe the template parameters first:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">stencil_operator</span></code> is the operator to be called.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">vertical_interval</span></code> is the <a class="reference internal" href="../glossary/glossary.html#term-Vertical-Interval"><span class="xref std std-term">Vertical Interval</span></a> where the operator will be applied (it can be empty, if the
stencil operator has an apply method without a vertical region).</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">return_value_type</span></code> is the type of the return value for the function call. The <code class="docutils literal notranslate"><span class="pre">return_value_type</span></code> will be
automatically deduced from the first <code class="docutils literal notranslate"><span class="pre">accessor</span></code> if not specified explicitly.</p></li>
</ul>
<p>The context object <code class="docutils literal notranslate"><span class="pre">eval</span></code> has to be passed as the first argument to <code class="docutils literal notranslate"><span class="pre">with</span></code>, followed by the
<a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessors</span></a> which are arguments of the operator.</p>
<p>Note that the first accessor in the stencil operator must be an artificial accessor of type <code class="docutils literal notranslate"><span class="pre">inout_accessor</span></code>. This
argument must not be passed to the function, but instead it is the return value of the function.</p>
<p>Example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">struct</span><span class="w"> </span><span class="nc">lap_function</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">out</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">inout_accessor</span><span class="o">&lt;</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">;</span><span class="w"> </span><span class="c1">// artificial first accessor</span>
<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="n">in</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">in_accessor</span><span class="o">&lt;</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="k">extent</span><span class="o">&lt;</span><span class="mi">-1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">-1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="o">&gt;&gt;</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="k">using</span><span class="w"> </span><span class="k">param_list</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">make_param_list</span><span class="o">&lt;</span><span class="n">out</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>

<span class="w">    </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">Evaluation</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">    </span><span class="cp">GT_FUNCTION</span><span class="w"> </span><span class="k">static</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">apply</span><span class="p">(</span><span class="n">Evaluation</span><span class="w"> </span><span class="n">eval</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">        </span><span class="n">eval</span><span class="p">(</span><span class="n">out</span><span class="p">())</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mf">4.</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">())</span><span class="w"> </span><span class="o">-</span><span class="w"></span>
<span class="w">            </span><span class="p">(</span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">-1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">))</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">-1</span><span class="p">))</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">))</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">eval</span><span class="p">(</span><span class="n">in</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">)));</span><span class="w"></span>
<span class="w">    </span><span class="p">}</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>

<span class="c1">// later:</span>
<span class="k">auto</span><span class="w"> </span><span class="n">ret</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">call</span><span class="o">&lt;</span><span class="n">lap_function</span><span class="o">&gt;::</span><span class="k">with</span><span class="p">(</span><span class="n">eval</span><span class="p">,</span><span class="w"> </span><span class="n">param</span><span class="p">());</span><span class="w"> </span><span class="c1">// only one parameter</span>
</pre></div>
</div>
<p>This function calculates the laplacian of a field. Note that the function is called only with one parameter, because the
first accessor (<code class="docutils literal notranslate"><span class="pre">out</span></code>) is the artificial accessor representing the return value.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If you pass a vertical interval to <code class="docutils literal notranslate"><span class="pre">call</span></code>, a matching apply method needs to exist in the called operator.</p>
</div>
<p>The <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessors</span></a> which are passed in the function call can have offsets in the usual way. Additionally the whole operator can be shifted to be executed
on a different grid point, by specifying a relative location with <code class="docutils literal notranslate"><span class="pre">at</span></code>:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">call</span><span class="o">&lt;</span><span class="p">...</span><span class="o">&gt;::</span><span class="k">at</span><span class="o">&lt;</span><span class="n">offset_i</span><span class="p">,</span><span class="w"> </span><span class="n">offset_j</span><span class="p">,</span><span class="w"> </span><span class="n">offset_k</span><span class="o">&gt;::</span><span class="k">with</span><span class="p">(...);</span><span class="w"></span>
</pre></div>
</div>
<p>For example, you can evaluate the laplacian at the next k-level as follows:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">call</span><span class="o">&lt;</span><span class="n">lap_function</span><span class="o">&gt;::</span><span class="k">at</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="o">&gt;::</span><span class="k">with</span><span class="p">(</span><span class="n">eval</span><span class="p">,</span><span class="w"> </span><span class="n">param</span><span class="p">());</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="procedure-calls-call-proc">
<h4>Procedure Calls: <cite>call_proc&lt;&gt;</cite><a class="headerlink" href="#procedure-calls-call-proc" title="Permalink to this heading"></a></h4>
<p>Procedural calls work in the same way as function calls, but all <a class="reference internal" href="../glossary/glossary.html#term-Accessor"><span class="xref std std-term">Accessor</span></a>, which are <code class="docutils literal notranslate"><span class="pre">inout</span></code> in the operator,
can be modified.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">call_proc</span><span class="o">&lt;</span><span class="n">stencil_operator</span><span class="p">,</span><span class="w"> </span><span class="n">vertical_interval</span><span class="o">&gt;::</span><span class="k">with</span><span class="p">(</span><span class="n">eval</span><span class="p">,</span><span class="w"> </span><span class="n">accessors</span><span class="p">...);</span><span class="w"></span>
</pre></div>
</div>
<p>Also offsets work the same way as for function calls. Using <code class="docutils literal notranslate"><span class="pre">at</span></code> with <code class="docutils literal notranslate"><span class="pre">call_proc</span></code> is not recommended and support
might be dropped in the future.</p>
<p>You can call the laplacian above with the following syntax:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">call_proc</span><span class="o">&lt;</span><span class="n">lap_function</span><span class="o">&gt;::</span><span class="k">with</span><span class="p">(</span><span class="n">eval</span><span class="p">,</span><span class="w"> </span><span class="n">lap</span><span class="p">(),</span><span class="w"> </span><span class="n">param</span><span class="p">());</span><span class="w"></span>
</pre></div>
</div>
</section>
</section>
<section id="software-managed-caches">
<span id="caches"></span><h3>Software-Managed Caches<a class="headerlink" href="#software-managed-caches" title="Permalink to this heading"></a></h3>
<p><a class="reference internal" href="../glossary/glossary.html#term-Software-Managed-Cache"><span class="xref std std-term">Software-Managed Caches</span></a> are syntax elements that are used
to describe data reuse patterns of the stencil computations.
They are an essential functionality of <cite>GridTools</cite> in order
to deliver an efficient implementation of memory bound codes.
The library uses
the cache annotations to allocate cached fields in a fast on-chip
scratch-pad memory.</p>
<p>While the library is capable of exploiting several on-chip memory layers
(like texture cache, const cache, shared memory, and registers of NVIDIA GPUs)
the <cite>GridTools</cite> language is abstracting these underlying memory layers and
exposes syntax elements that are architecture agnostic.</p>
<p>Therefore the <a class="reference internal" href="../glossary/glossary.html#term-Software-Managed-Cache"><span class="xref std std-term">Software-Managed Cache</span></a> syntax should be used by the
user to describe <em>only</em> data reuse patterns, and not the type of
on-chip memory that should be exploited (which is a decision delegated to
the backend of the library).</p>
<p>An example of the syntax for caching certain fields of a
computation is shown below</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">spec</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[](</span><span class="k">auto</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="k">auto</span><span class="w"> </span><span class="n">out</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="cp">GT_DECLARE_TMP</span><span class="p">(</span><span class="kt">double</span><span class="p">,</span><span class="w"> </span><span class="n">f1</span><span class="p">,</span><span class="w"> </span><span class="n">f2</span><span class="p">);</span><span class="w"></span>
<span class="w">    </span><span class="k">return</span><span class="w"> </span><span class="nl">execute_parallel</span><span class="p">()</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">ij_cached</span><span class="p">(</span><span class="n">f1</span><span class="p">,</span><span class="w"> </span><span class="n">f2</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">stage</span><span class="p">(</span><span class="n">lap_function</span><span class="p">(),</span><span class="w"> </span><span class="n">f1</span><span class="p">,</span><span class="w"> </span><span class="n">f2</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">)</span><span class="w"></span>
<span class="w">        </span><span class="p">.</span><span class="n">stage</span><span class="p">(</span><span class="n">lap_function</span><span class="p">(),</span><span class="w"> </span><span class="n">out</span><span class="p">,</span><span class="w"> </span><span class="n">f1</span><span class="p">,</span><span class="w"> </span><span class="n">f2</span><span class="p">);</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
<span class="nl">run</span><span class="p">(</span><span class="n">spec</span><span class="p">,</span><span class="w"> </span><span class="n">backend_t</span><span class="p">(),</span><span class="w"> </span><span class="n">grid</span><span class="p">,</span><span class="w"> </span><span class="n">in</span><span class="p">,</span><span class="w"> </span><span class="n">out</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a> DSL elements are enclosed into a <code class="docutils literal notranslate"><span class="pre">ij_cached</span></code> construct,
that accept any number of <code class="docutils literal notranslate"><span class="pre">cache</span></code> constructs. At the same time, each
<code class="docutils literal notranslate"><span class="pre">cache</span></code> construct can specify multiple fields that shared the same
access pattern.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>It is important to note that the <code class="docutils literal notranslate"><span class="pre">cache</span></code> specifications
are prescribing the behavior of the library: if a <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a>
is specified (and the backend supports caching), a <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a> will be used. In the (rare) case of
using too many <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Caches</span></a> a decrease in performance might be
observed due to saturation of available resources, or, in the worst case, the computation might error at
run-time.</p>
</div>
<p>The <code class="docutils literal notranslate"><span class="pre">cache</span></code> construct adheres to the following syntax:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="p">...</span><span class="w"> </span><span class="p">.</span><span class="n">ij_cached</span><span class="p">(</span><span class="n">placeholders</span><span class="p">...)</span><span class="w"> </span><span class="p">...</span><span class="w"></span>
<span class="p">...</span><span class="w"> </span><span class="p">.</span><span class="n">k_cached</span><span class="p">(</span><span class="n">placeholders</span><span class="p">...)</span><span class="w"> </span><span class="p">...</span><span class="w"></span>
<span class="p">...</span><span class="w"> </span><span class="p">.</span><span class="n">k_cached</span><span class="p">(</span><span class="nn">cache_io_policy</span><span class="o">::</span><span class="k">fill</span><span class="p">,</span><span class="w"> </span><span class="n">placeholders</span><span class="p">...)</span><span class="w"> </span><span class="p">...</span><span class="w"></span>
<span class="p">...</span><span class="w"> </span><span class="p">.</span><span class="n">k_cached</span><span class="p">(</span><span class="nn">cache_io_policy</span><span class="o">::</span><span class="k">flush</span><span class="p">,</span><span class="w"> </span><span class="n">placeholders</span><span class="p">...)</span><span class="w"> </span><span class="p">...</span><span class="w"></span>
<span class="p">...</span><span class="w"> </span><span class="p">.</span><span class="n">k_cached</span><span class="p">(</span><span class="nn">cache_io_policy</span><span class="o">::</span><span class="k">fill</span><span class="p">,</span><span class="w"> </span><span class="nn">cache_io_policy</span><span class="o">::</span><span class="k">flush</span><span class="p">,</span><span class="w"> </span><span class="n">placeholders</span><span class="p">...)</span><span class="w"> </span><span class="p">...</span><span class="w"></span>
</pre></div>
</div>
<p>where <code class="docutils literal notranslate"><span class="pre">p_args...</span></code> is a list of placeholders for which the specified caching
should be used.
Full examples on <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a> usages can be found in the
<a class="reference external" href="https://github.com/GridTools/gridtools/blob/master/examples/stencil/horizontal_diffusion_limited.cpp">horizontal diffusion</a>
and
<a class="reference external" href="https://github.com/GridTools/gridtools/blob/master/tests/regression/vertical_advection_dycore.cpp">vertical advection</a> examples.</p>
<p>We now describe the details of each construct.</p>
<section id="cache-type">
<span id="id8"></span><h4>Cache Type<a class="headerlink" href="#cache-type" title="Permalink to this heading"></a></h4>
<p>The <code class="docutils literal notranslate"><span class="pre">cache_type</span></code> describes the type of access pattern present in our stencil for the field being cached. It’s
value can be one of the following (where we indicate the basic mean of implementation on the GPUs, so that the user can understand the amount of resources involved):</p>
<ol class="arabic simple">
<li><p><code class="docutils literal notranslate"><span class="pre">ij_cached</span></code>: cache data fields whose access pattern lies in the ij-plane, i.e. only offsets of the type <cite>i ±
X</cite> or <cite>j ± Y</cite> are allowed (the GPU backend will cache these fields in shared memory). It is undefined behaviour to
access data with k-offsets.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">k_cached</span></code>: cache data fields whose access pattern is restricted to the k-direction, i.e. only offsets of the
type <cite>k ± Z</cite> (the GPU backend will cache these fields in registers). It is undefined behaviour to access data with
offsets in i or j direction.</p></li>
</ol>
</section>
<section id="cache-policy">
<span id="id9"></span><h4>Cache Policy<a class="headerlink" href="#cache-policy" title="Permalink to this heading"></a></h4>
<p>The <code class="docutils literal notranslate"><span class="pre">cache_policy</span></code> specifies a synchronization policy between the data in the <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a> and the data in main memory. A scratch-pad can be used
in order to allocate temporary computations that do not require data persistency across multiple stencils. However often the data that is
being cached is already present in main memory fields. In this case, the <a class="reference internal" href="../glossary/glossary.html#term-Software-Managed-Cache"><span class="xref std std-term">Software-Managed Caches</span></a> of <cite>GridTools</cite> gives the possibility
to specify a cache policy that allows to synchronize the main memory with the cached field.
The possible values are:</p>
<ol class="arabic simple">
<li><p><code class="docutils literal notranslate"><span class="pre">cache_io_policy::fill</span></code>: fill the scratch-pad buffer with data from main memory field before use.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">cache_io_policy::flush</span></code>: After the execution of the stencil operators the data in the <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a> is written back into the main memory fields.</p></li>
</ol>
<blockquote>
<div><p><a class="reference internal" href="#fig-kcache-ex"><span class="std std-numref">Fig. 3</span></a> graphically depicts an example of all the ordered operations that are executed when a <code class="docutils literal notranslate"><span class="pre">fill_and_flush</span></code>
<a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a> is used in a forward vertical loop.</p>
</div></blockquote>
<figure class="align-default" id="id20">
<span id="fig-kcache-ex"></span><a class="reference internal image-reference" href="../_images/kcache_ex.png"><img alt="../_images/kcache_ex.png" src="../_images/kcache_ex.png" style="width: 759.5px; height: 420.5px;" /></a>
<figcaption>
<p><span class="caption-number">Fig. 3 </span><span class="caption-text">Representation of an implementation for a <code class="docutils literal notranslate"><span class="pre">cache&lt;cache_type::k,</span> <span class="pre">cache_io_policy::fill_and_flush&gt;</span></code> that is used within a
stencil with <a class="reference internal" href="../glossary/glossary.html#term-Extent"><span class="xref std std-term">Extent</span></a> <code class="docutils literal notranslate"><span class="pre">&lt;-2,</span> <span class="pre">1&gt;</span></code> in the vertical dimension and implemented as a ring-buffer with 4 levels (in order to allocate all possible offsetted accesses). The three operations
are triggered automatically by the library for a <cite>fill_and_flush</cite> <a class="reference internal" href="../glossary/glossary.html#term-Cache"><span class="xref std std-term">Cache</span></a> when the vertical loop transition from level 9 to level 10.</span><a class="headerlink" href="#id20" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
</section>
<section id="expandable-parameters">
<span id="id10"></span><h3>Expandable Parameters<a class="headerlink" href="#expandable-parameters" title="Permalink to this heading"></a></h3>
<p>Expandable parameters implement a “single stencil multiple storages” pattern.
They are useful when we have a vector of storages which have the same
shape, and we want to perform the same operation with all of them
(a typical situation when implementing e.g. time differentiation schemes).
Normally this could be achieved by creating a loop and running multiple computations,
but this solution would be inefficient. A more efficient solution is provided
through the expandable parameters API.</p>
<p>The user must collect the storages in a <code class="docutils literal notranslate"><span class="pre">std::vector</span></code></p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="n">storage_t</span><span class="o">&gt;</span><span class="w"> </span><span class="n">list</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="n">storage1</span><span class="p">,</span><span class="w"> </span><span class="n">storage2</span><span class="p">,</span><span class="w"> </span><span class="n">storage3</span><span class="p">,</span><span class="w"> </span><span class="n">storage4</span><span class="p">,</span><span class="w"> </span><span class="n">storage5</span><span class="p">,</span><span class="w"> </span><span class="n">storage6</span><span class="p">,</span><span class="w"> </span><span class="n">storage7</span><span class="p">,</span><span class="w"> </span><span class="n">storage8</span><span class="p">};</span><span class="w"></span>
</pre></div>
</div>
<p>This <code class="docutils literal notranslate"><span class="pre">std::vector</span></code> is then used as a storage type with no differences with respect to
the regular storages.</p>
<p>The implementation requires the user to specify an integer <code class="docutils literal notranslate"><span class="pre">expand_factor</span></code> when defining the computation.
The optimal value for <code class="docutils literal notranslate"><span class="pre">expand_factor</span></code> might be backend-dependent.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="n">spec</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[](</span><span class="k">auto</span><span class="w"> </span><span class="n">s</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">  </span><span class="k">return</span><span class="w"> </span><span class="nl">execute_parallel</span><span class="p">().</span><span class="n">stage</span><span class="p">(</span><span class="n">functor</span><span class="p">(),</span><span class="w"> </span><span class="n">s</span><span class="p">);</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
<span class="nl">expandable_run</span><span class="o">&lt;</span><span class="mi">4</span><span class="o">&gt;</span><span class="p">(</span><span class="n">spec</span><span class="p">,</span><span class="w"> </span><span class="n">backend_t</span><span class="p">(),</span><span class="w"> </span><span class="n">grid</span><span class="p">,</span><span class="w"> </span><span class="n">s</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The vector of
storages is then partitioned into chunks of <code class="docutils literal notranslate"><span class="pre">expand_factor</span></code> size (with a remainder). Each
chunk is unrolled within a computation, and for each chunk a different computation is
instantiated. The remainder elements are then processed one by one.</p>
<p>Summing up, the only differences with respect to the case without expandable parameters are:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">expandable_run</span></code> has to be used instead of <code class="docutils literal notranslate"><span class="pre">run</span></code></p></li>
<li><p>an <code class="docutils literal notranslate"><span class="pre">expand_factor</span></code> has to be passed to the <code class="docutils literal notranslate"><span class="pre">expandable_run</span></code>, defining the size of the chunks of</p></li>
<li><p>expandable parameters should be unrolled in each computation.</p></li>
<li><p>a <code class="docutils literal notranslate"><span class="pre">std::vector</span></code> of storage pointers has to be used instead of a single storage.</p></li>
</ul>
<p>All the rest is managed by <cite>GridTools</cite>, so that the user is not exposed to the complexity of the
unrolling, he can reuse the code when the expand factor changes, and he can resize dynamically the expandable
parameters vector, for instance by adding or removing elements.</p>
</section>
<section id="global-parameters">
<span id="global-accessor"></span><h3>Global Parameters<a class="headerlink" href="#global-parameters" title="Permalink to this heading"></a></h3>
<p>The API allows the user to define an arbitrary object to act as a <a class="reference internal" href="../glossary/glossary.html#term-Global-Parameter"><span class="xref std std-term">Global Parameter</span></a> as long as it is trivially copyable.
To create a <a class="reference internal" href="../glossary/glossary.html#term-Global-Parameter"><span class="xref std std-term">Global Parameter</span></a> from a user-defined object, we pass the object to <code class="docutils literal notranslate"><span class="pre">global_parameter()</span></code></p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">auto</span><span class="w"> </span><span class="n">my_global_parameter</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">global_parameter</span><span class="p">(</span><span class="n">my_object</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
</section>
</section>
<section id="boundary-conditions">
<span id="id11"></span><h2>Boundary Conditions<a class="headerlink" href="#boundary-conditions" title="Permalink to this heading"></a></h2>
<section id="introduction">
<h3>Introduction<a class="headerlink" href="#introduction" title="Permalink to this heading"></a></h3>
<p>The boundary condition module in <cite>GridTools</cite> is designed following the
principle that boundary conditions can be arbitrarily complex, so we
want the user to be able to specify any boundary condition code to set
up their problems.</p>
</section>
<section id="preliminaries">
<h3>Preliminaries<a class="headerlink" href="#preliminaries" title="Permalink to this heading"></a></h3>
<p>One main concept that is needed for the boundary condition is the one
of <cite>direction</cite>.</p>
<p>In a 3D regular grid, which is where this implementation of the
boundary condition library applies, we associate a 3D axis system,
and the cell indices (i, j, k) naturally lie on it. With this axis
system the concept of “vector” can be defined to indicate
distances and directions. Direction is the one thing we need
here. Instead of using unitary vectors to indicate directions, as
it is usually the case for euclidean spaces, we use vectors whose
components are -1, 0, and 1.  For example, <span class="math notranslate nohighlight">\((1, 1, 1)\)</span> is the
direction indicated by the unit vector <span class="math notranslate nohighlight">\((1, 1, 1)/\sqrt3\)</span>.
If we take the center of a 3D grid, then we can define 26
different directions <span class="math notranslate nohighlight">\(\{(i, j, k): i, j, k \in \{-1, 0, 1\}\}\setminus\{(0, 0, 0)\}\)</span>
that identify the different faces, edges and corners of the cube to
which the grid is topologically analogous with.</p>
<p>The main idea is that a boundary condition class specializes
<cite>operator()</cite> on a <cite>direction</cite>, or a subset of directions, and then
perform the user specified computation on the boundaries on those
directions.</p>
<p>The user can define their own boundary condition classes and perform
specific computation in each direction. For this reason <cite>GridTools</cite> provides
a <code class="docutils literal notranslate"><span class="pre">direction</span></code> type which can take three direction values, that are
indicated as <code class="docutils literal notranslate"><span class="pre">minus_</span></code>, <code class="docutils literal notranslate"><span class="pre">plus_</span></code> and <code class="docutils literal notranslate"><span class="pre">zero_</span></code>, which are values of an
<code class="docutils literal notranslate"><span class="pre">enum</span></code> called <code class="docutils literal notranslate"><span class="pre">sign</span></code>.</p>
</section>
<section id="boundary-condition-class">
<span id="boundary-conditions-class"></span><h3>Boundary Condition Class<a class="headerlink" href="#boundary-condition-class" title="Permalink to this heading"></a></h3>
<p>The boundary condition class is a regular class which need to be copy
constructible, and whose member functions should be decorated with the
<code class="docutils literal notranslate"><span class="pre">GT_FUNCTION</span></code> keyword to enable accelerators. It must not contain references to data that may be not available on the target device where the boundary conditions are applied.</p>
<p>The boundary condition class provides overloads for the <code class="docutils literal notranslate"><span class="pre">operator()</span></code>
which take as first argument a <code class="docutils literal notranslate"><span class="pre">direction</span></code> object, a number of <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data
Stores</span></a> that are the inputs, and three integer values that will contains the
coordinate indices of the cell that is being iterated on.</p>
<p>All overloads must have the same number of arguments: the first argument is the direction over which the overload will be applied to, then there is the list of <a class="reference internal" href="../glossary/glossary.html#term-Data-View"><span class="xref std std-term">Data Views</span></a> that will be accessed by the boundary class, and finally three integers that contains the indices of the element being accessed in the call.
It is standard practice to let the view types be template
arguments. For instance, here a class that applies a copy-boundary
condition (copy the second view into the first one) for all direction
apart all directions for which the third component is <code class="docutils literal notranslate"><span class="pre">minus_</span></code>:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">struct</span><span class="w"> </span><span class="nc">example_bc</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">   </span><span class="kt">double</span><span class="w"> </span><span class="n">value</span><span class="p">;</span><span class="w"></span>

<span class="w">   </span><span class="cp">GT_FUNCTION</span><span class="w"></span>
<span class="w">   </span><span class="nf">example_bc</span><span class="p">(</span><span class="kt">double</span><span class="w"> </span><span class="n">v</span><span class="p">)</span><span class="w"> </span><span class="o">:</span><span class="w"> </span><span class="n">value</span><span class="p">(</span><span class="n">v</span><span class="p">)</span><span class="w"> </span><span class="p">{}</span><span class="w"></span>

<span class="w">   </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">Direction</span><span class="p">,</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">DataField0</span><span class="p">,</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">DataField1</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">   </span><span class="cp">GT_FUNCTION</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="k">operator</span><span class="p">()(</span><span class="n">Direction</span><span class="p">,</span><span class="w"></span>
<span class="w">                               </span><span class="n">DataField0</span><span class="w"> </span><span class="o">&amp;</span><span class="n">data_field0</span><span class="p">,</span><span class="w"> </span><span class="n">DataField1</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="n">data_field1</span><span class="p">,</span><span class="w"></span>
<span class="w">                               </span><span class="kt">unsigned</span><span class="w"> </span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="kt">unsigned</span><span class="w"> </span><span class="n">j</span><span class="p">,</span><span class="w"> </span><span class="kt">unsigned</span><span class="w"> </span><span class="n">k</span><span class="p">)</span><span class="w"> </span><span class="k">const</span><span class="w"></span>
<span class="w">   </span><span class="p">{</span><span class="w"></span>
<span class="w">     </span><span class="n">data_field0</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">j</span><span class="p">,</span><span class="w"> </span><span class="n">k</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">data_field1</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">j</span><span class="p">,</span><span class="w"> </span><span class="n">k</span><span class="p">);</span><span class="w"></span>
<span class="w">   </span><span class="p">}</span><span class="w"></span>

<span class="w">   </span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="n">sign</span><span class="w"> </span><span class="n">I</span><span class="p">,</span><span class="w"> </span><span class="n">sign</span><span class="w"> </span><span class="n">J</span><span class="p">,</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">DataField0</span><span class="p">,</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">DataField1</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">   </span><span class="cp">GT_FUNCTION</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="k">operator</span><span class="p">()(</span><span class="n">direction</span><span class="o">&lt;</span><span class="n">I</span><span class="p">,</span><span class="w"> </span><span class="n">J</span><span class="p">,</span><span class="w"> </span><span class="n">minus_</span><span class="o">&gt;</span><span class="p">,</span><span class="w"></span>
<span class="w">                               </span><span class="n">DataField0</span><span class="w"> </span><span class="o">&amp;</span><span class="n">data_field0</span><span class="p">,</span><span class="w"> </span><span class="n">DataField1</span><span class="w"> </span><span class="k">const</span><span class="w"> </span><span class="o">&amp;</span><span class="p">,</span><span class="w"></span>
<span class="w">                               </span><span class="kt">unsigned</span><span class="w"> </span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="kt">unsigned</span><span class="w"> </span><span class="n">j</span><span class="p">,</span><span class="w">  </span><span class="kt">unsigned</span><span class="w"> </span><span class="n">k</span><span class="p">)</span><span class="w"> </span><span class="k">const</span><span class="w"></span>
<span class="w">   </span><span class="p">{</span><span class="w"></span>
<span class="w">     </span><span class="n">data_field0</span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">j</span><span class="p">,</span><span class="w"> </span><span class="n">k</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">value</span><span class="p">;</span><span class="w"></span>
<span class="w">   </span><span class="p">}</span><span class="w"></span>
<span class="p">};</span><span class="w"></span>
</pre></div>
</div>
<p><cite>operator()</cite> of the boundary class is called by the library, on the 26 directions, and got each value in the data that correspond to each direction. In the previous example, each direction in which the third component is <code class="docutils literal notranslate"><span class="pre">minus</span></code> will select the specialized overload, while all other directions select the first implementation.</p>
</section>
<section id="boundary-condition-application">
<h3>Boundary Condition Application<a class="headerlink" href="#boundary-condition-application" title="Permalink to this heading"></a></h3>
<p>To apply the above boundary conditions class to the data fields, we
need to construct the boundary object, but also to specify the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a>
regions. The <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> regions are specified using
<a class="reference internal" href="../glossary/glossary.html#term-Halo-Descriptor"><span class="xref std std-term">Halo Descriptors</span></a>.</p>
<p>To do this we need an array of <a class="reference internal" href="../glossary/glossary.html#term-Halo-Descriptor"><span class="xref std std-term">Halo Descriptors</span></a> initialized with the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> information of the data fields.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>The fifth number, namely the total length, in the <a class="reference internal" href="../glossary/glossary.html#term-Halo-Descriptor"><span class="xref std std-term">Halo
Descriptor</span></a> is not used by the boundary condition application module,
but kept to reduce the number of similar concepts.</p>
</div>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">array</span><span class="o">&lt;</span><span class="k">halo_descriptor</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="o">&gt;</span><span class="w"> </span><span class="n">halos</span><span class="p">;</span><span class="w"></span>
<span class="n">halos</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">halo_descriptor</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">d1</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">d1</span><span class="p">);</span><span class="w"></span>
<span class="n">halos</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">halo_descriptor</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">d2</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">d2</span><span class="p">);</span><span class="w"></span>
<span class="n">halos</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">halo_descriptor</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">d3</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">d3</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>After this is done we can apply the boundary condition by, as in this
example, constructing the boundary object and applying it to the data
fields. The number of data fields to pass is equal to the number of
fields the <code class="docutils literal notranslate"><span class="pre">operator()</span></code> overloads of the boundary class require.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="nl">boundary</span><span class="o">&lt;</span><span class="n">example_bc</span><span class="p">,</span><span class="w"> </span><span class="n">backend_t</span><span class="o">&gt;</span><span class="p">(</span><span class="n">halos</span><span class="p">,</span><span class="w"> </span><span class="n">example_bc</span><span class="p">(</span><span class="mi">42</span><span class="p">)).</span><span class="n">apply</span><span class="p">(</span><span class="n">out_s</span><span class="p">,</span><span class="w"> </span><span class="n">in_s</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>As can be noted, the <code class="docutils literal notranslate"><span class="pre">backend</span></code> is also needed to select the proper
implementation of the boundary application algorithm (see <a class="reference internal" href="../glossary/glossary.html#term-Backend"><span class="xref std std-term">Backend</span></a>). <code class="docutils literal notranslate"><span class="pre">out_s</span></code> and
<code class="docutils literal notranslate"><span class="pre">in_s</span></code> are the two data fields passed to the application. The fact
that the first is the output and second is the input derives from the
signature of the overloads of <code class="docutils literal notranslate"><span class="pre">operator()</span></code>, and it is user defined.</p>
</section>
<section id="boundary-predication">
<h3>Boundary Predication<a class="headerlink" href="#boundary-predication" title="Permalink to this heading"></a></h3>
<p>Predication is an additional feature to control the boundary
application.  The predicate type have to be specified as template
argument of the boundary class, and the instantiated object of that
type passed as third argument of the boundary class constructor, as in
the following example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="nl">boundary</span><span class="o">&lt;</span><span class="n">direction_bc_input</span><span class="o">&lt;</span><span class="n">uint_t</span><span class="o">&gt;</span><span class="p">,</span><span class="w"> </span><span class="n">backend_t</span><span class="p">,</span><span class="w"> </span><span class="n">predicate_t</span><span class="o">&gt;</span><span class="w"></span>
<span class="w">  </span><span class="p">(</span><span class="n">halos</span><span class="p">,</span><span class="w"> </span><span class="n">direction_bc_input</span><span class="o">&lt;</span><span class="n">uint_t</span><span class="o">&gt;</span><span class="p">(</span><span class="mi">42</span><span class="p">),</span><span class="w"> </span><span class="n">predicate_t</span><span class="p">{}).</span><span class="n">apply</span><span class="p">(</span><span class="n">out_s</span><span class="p">,</span><span class="w"> </span><span class="n">in_s</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The predicate must obey a fixed interface, that is, it has to accept
as argument a <code class="docutils literal notranslate"><span class="pre">direction</span></code> object, so that the user can, at runtime,
disable some <code class="docutils literal notranslate"><span class="pre">operator()</span></code> overloads. This can be very useful when
the user is running on a parallel domain decomposed domain, and only
the global boundaries need to updated with the boundary conditions
application and the rest should have their <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halos</span></a> updated from
neighbors.</p>
</section>
<section id="provided-boundary-conditions">
<span id="id12"></span><h3>Provided Boundary Conditions<a class="headerlink" href="#provided-boundary-conditions" title="Permalink to this heading"></a></h3>
<p><cite>GridTools</cite> provides few boundary application classes for some common cases. They are</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">copy_boundary</span></code> to copy the boundary of the last field of the argument list of <cite>apply</cite> into the other ones;</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">template</span> <span class="pre">&lt;class</span> <span class="pre">T&gt;</span> <span class="pre">value_boundary</span></code> to set the boundary to a value for all the data fields provided;</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">zero_boundary</span></code> to set the boundary to the default constructed value type of the data fields (usually a zero) for the input fields.</p></li>
</ul>
</section>
</section>
<section id="halo-exchanges">
<span id="id13"></span><h2>Halo Exchanges<a class="headerlink" href="#halo-exchanges" title="Permalink to this heading"></a></h2>
<section id="id14">
<h3>Introduction<a class="headerlink" href="#id14" title="Permalink to this heading"></a></h3>
<p>The communication module in <cite>GridTools</cite> is dubbed <a class="reference internal" href="../glossary/glossary.html#term-GCL"><span class="xref std std-term">GCL</span></a>. It’s a low level
halo-update interface for 3D fields that takes 3D arrays of some
types, and the descriptions of the halos, and perform the exchanges in
a scalable way.</p>
<p>It is low-level because the requirements from which it was initially
designed, required easy interoperability with C and Fortran, so the API
takes pointers and sizes. The sizes are specified by
<code class="docutils literal notranslate"><span class="pre">halo_descriptor</span></code>, which are loosely inspired by the BLAS description
of dimensions of matrices. A new, more modern set of interfaces are
being implemented, to serve more general cases, such as higher
dimensions and other grids.</p>
<p>We first start with some preliminaries and then discuss the main
interfaces.</p>
</section>
<section id="id15">
<h3>Preliminaries<a class="headerlink" href="#id15" title="Permalink to this heading"></a></h3>
<section id="processor-grid">
<h4>Processor Grid<a class="headerlink" href="#processor-grid" title="Permalink to this heading"></a></h4>
<p>The processor grid is a concept that describe a 3D lattice of
computing elements (you may think of those as MPI tasks). The
identifiers of them are tuples of indices. This naturally maps to a 3D
decomposition of a data field.</p>
</section>
<section id="layout-map">
<h4>Layout Map<a class="headerlink" href="#layout-map" title="Permalink to this heading"></a></h4>
<p>The communication layer needs two <a class="reference internal" href="../glossary/glossary.html#term-Layout-Map"><span class="xref std std-term">Layout Maps</span></a>:
one for describing the data, and one for the
processor grid. For the user, the dimensions of the data are always
indicated as first, second, and third (or i, j, k), it is the
<a class="reference internal" href="../glossary/glossary.html#term-Layout-Map"><span class="xref std std-term">Layout Map</span></a> that indicates the stride orders, as in the following example:</p>
<p>For instance:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="c1">//         i, j, k</span>
<span class="k">layout_map</span><span class="o">&lt;</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="w"></span>
</pre></div>
</div>
<p>This <a class="reference internal" href="../glossary/glossary.html#term-Layout-Map"><span class="xref std std-term">Layout Map</span></a> indicates that the first dimension in the data (i) is the
second in the decreasing stride order, while the second (j) has the
biggest stride, and last dimension (k) is the one with stride 1. The
largest strides are associated to smaller indices, so that
<code class="docutils literal notranslate"><span class="pre">layout_map&lt;0,</span> <span class="pre">1,</span> <span class="pre">2&gt;</span></code> corresponds to a C-layout, while
<code class="docutils literal notranslate"><span class="pre">layout_map&lt;2,</span> <span class="pre">1,</span> <span class="pre">0&gt;</span></code> to a Fortran layout.</p>
<p>The second template <a class="reference internal" href="../glossary/glossary.html#term-Layout-Map"><span class="xref std std-term">Layout Map</span></a> in the <a class="reference internal" href="../glossary/glossary.html#term-Halo-Exchange"><span class="xref std std-term">Halo Exchange</span></a>
pattern is the map between data coordinates and the processor
grid coordinates.</p>
<p>The following layout specification</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">layout_map</span><span class="o">&lt;</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="w"></span>
</pre></div>
</div>
<p>would mean: The first dimension in data matches with the second
dimension of the computing grid, the second dimension of the data to
the first of the processing grid, and the third one to the third
one. This is rarely different from <code class="docutils literal notranslate"><span class="pre">layout_map&lt;0,</span> <span class="pre">1,</span> <span class="pre">2&gt;</span></code>, so it can
generally be ignored, but we give an example to clarify its meaning.</p>
<p>Suppose the processor grid (domain decomposition sizes) has size PIxPJx1. Now, we want
to say that the first dimension on data ‘extends’ to the
computing grid on (or that the first dimension in the data corresponds
to) the first dimension in the computing grid. Let’s consider a 2x1
process grid, and the first dimension of the data being the rows (i)
and the second the column (j). In this case we are assuming a
distribution like in <a class="reference internal" href="#fig-dist1"><span class="std std-numref">Fig. 4</span></a>.</p>
<figure class="align-default" id="id21">
<span id="fig-dist1"></span><a class="reference internal image-reference" href="../_images/dist1.png"><img alt="../_images/dist1.png" src="../_images/dist1.png" style="width: 624.4000000000001px; height: 405.20000000000005px;" /></a>
<figcaption>
<p><span class="caption-number">Fig. 4 </span><span class="caption-text">Example data distribution among two processes.</span><a class="headerlink" href="#id21" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>In this case the map between data and the processor grid is:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">layout_map</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="w"> </span><span class="c1">// The 3rd dimension stride is 1</span>
</pre></div>
</div>
<p>On the other hand, having specified</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">layout_map</span><span class="o">&lt;</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="w"></span>
</pre></div>
</div>
<p>for this map, would imply a layout/distribution like the following <a class="reference internal" href="#fig-dist2"><span class="std std-numref">Fig. 5</span></a>.</p>
<figure class="align-default" id="id22">
<span id="fig-dist2"></span><a class="reference internal image-reference" href="../_images/dist2.png"><img alt="../_images/dist2.png" src="../_images/dist2.png" style="width: 447.6px; height: 464.0px;" /></a>
<figcaption>
<p><span class="caption-number">Fig. 5 </span><span class="caption-text">Example data distribution among two processes.</span><a class="headerlink" href="#id22" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>Where the second dimension in the data correspond to the fist
dimension in the processor grid. Again, the data coordinates
ordering is the one the user choose to be the logical order in the
application, not the increasing stride order.</p>
</section>
<section id="halo-descriptor">
<span id="id16"></span><h4>Halo Descriptor<a class="headerlink" href="#halo-descriptor" title="Permalink to this heading"></a></h4>
<p>Given  a  dimension of  the  data  (array), the  communication  module
requires the user to describe it using the <code class="docutils literal notranslate"><span class="pre">halo_descriptor</span></code> class,
which takes five integers. This class identifies the data that needs
to be exchanged.</p>
<p>Consider a dimension which has <code class="docutils literal notranslate"><span class="pre">minus</span></code> halo lines on one side, and
<code class="docutils literal notranslate"><span class="pre">plus</span></code> halo lines on the other (The minus and plus indicate the sides
close to index 0 and the last index of the dimension,
respectively). The beginning of the inner region is marked by <code class="docutils literal notranslate"><span class="pre">begin</span></code>
and its ending by <code class="docutils literal notranslate"><span class="pre">end</span></code>. The end is inclusive, meaning that the index
specified by it, is part of the inner region. Another value is
necessary, which has to be larger than <code class="docutils literal notranslate"><span class="pre">end</span> <span class="pre">-</span> <span class="pre">begin</span> <span class="pre">+</span> <span class="pre">1</span> <span class="pre">+</span> <span class="pre">minus</span> <span class="pre">+</span> <span class="pre">plus</span></code>, and
is the <code class="docutils literal notranslate"><span class="pre">total_length</span></code>. This parameter is the equivalent of the
“leading dimension” in BLAS. With these five numbers we can describe
arbitrary dimensions, with paddings on the left and on the right, such
as the example in <a class="reference internal" href="#fig-halo-descriptor"><span class="std std-numref">Fig. 6</span></a>.</p>
<p>The interface for specifying a halo descriptor is fairly simple, where
the name of arguments should be self-explanatory:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">halo_descriptor</span><span class="p">(</span><span class="n">uint_t</span><span class="w"> </span><span class="n">minus</span><span class="p">,</span><span class="w"> </span><span class="n">uint_t</span><span class="w"> </span><span class="n">plus</span><span class="p">,</span><span class="w"> </span><span class="n">uint_t</span><span class="w"> </span><span class="n">begin</span><span class="p">,</span><span class="w"> </span><span class="n">uint_t</span><span class="w"> </span><span class="n">end</span><span class="p">,</span><span class="w"> </span><span class="n">uint_t</span><span class="w"> </span><span class="n">total_length</span><span class="p">)</span><span class="w"></span>
</pre></div>
</div>
<div class="admonition-todo admonition" id="id17">
<p class="admonition-title">Todo</p>
<p>annotate example with minus plus begin and end</p>
</div>
<figure class="align-default" id="id23">
<span id="fig-halo-descriptor"></span><a class="reference internal image-reference" href="../_images/halo_descriptor.png"><img alt="../_images/halo_descriptor.png" src="../_images/halo_descriptor.png" style="width: 480.5px; height: 284.5px;" /></a>
<figcaption>
<p><span class="caption-number">Fig. 6 </span><span class="caption-text">Example halo descriptor with one halo point on the left and two on the right.</span><a class="headerlink" href="#id23" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
</section>
<section id="gcl-communication-module">
<span id="id18"></span><h3>GCL Communication Module<a class="headerlink" href="#gcl-communication-module" title="Permalink to this heading"></a></h3>
<p>Now we are ready to describe the <a class="reference internal" href="../glossary/glossary.html#term-Halo-Exchange"><span class="xref std std-term">Halo Exchange</span></a> patterns objects. The first one is <code class="docutils literal notranslate"><span class="pre">halo_exchange_dynamic_ut</span></code>. The <code class="docutils literal notranslate"><span class="pre">ut</span></code> suffix stands for <code class="docutils literal notranslate"><span class="pre">uniform</span> <span class="pre">types</span></code>, meaning that the data fields that this object will manage must all store the same value types, that are declared at instantiation time. The domain decomposition goes up to three dimensions and the data to be exchanged contained in 3 dimensional arrays (lower dimensions can be handled by setting the missing dimensions to 1). Being designed for three dimensional data, the layout maps have three elements (refer to :numref:storage-info for more information).</p>
<p>The type of the object is defined as in this example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">pattern_type</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">gcl</span><span class="o">::</span><span class="nl">halo_exchange_dynamic_ut</span><span class="o">&lt;</span><span class="k">layout_map</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="p">,</span><span class="w"></span>
<span class="w">                     </span><span class="k">layout_map</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="p">,</span><span class="w"> </span><span class="n">value_type</span><span class="p">,</span><span class="w"> </span><span class="n">gcl</span><span class="o">::</span><span class="n">cpu</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>The template arguments are:</p>
<ol class="arabic simple">
<li><p>the layout if the data;</p></li>
<li><p>the mapping between the data dimensions and processing <a class="reference internal" href="../glossary/glossary.html#term-Grid"><span class="xref std std-term">Grid</span></a>, as described above (leave it as <code class="docutils literal notranslate"><span class="pre">layout_map&lt;0,</span> <span class="pre">1,</span> <span class="pre">2&gt;</span></code> if in doubt);</p></li>
<li><p>the type of the values to be exchanged;</p></li>
<li><p>the place where the data lives and for which the code is optimized. The options for this arguments are <code class="docutils literal notranslate"><span class="pre">gcl::gpu</span></code> and <code class="docutils literal notranslate"><span class="pre">gcl::cpu</span></code>.</p></li>
</ol>
<p>The <a class="reference internal" href="../glossary/glossary.html#term-Halo-Exchange"><span class="xref std std-term">Halo Exchange</span></a> object can be instantiated as:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">pattern_type</span><span class="w"> </span><span class="nf">he</span><span class="p">(</span><span class="n">pattern_type</span><span class="o">::</span><span class="n">grid_type</span><span class="o">::</span><span class="n">period_type</span><span class="p">(</span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="nb">false</span><span class="p">,</span><span class="w"> </span><span class="nb">true</span><span class="p">),</span><span class="w"> </span><span class="n">CartComm</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>Where <code class="docutils literal notranslate"><span class="pre">period_type</span></code> indicates whether the corresponding dimensions are
periodic or not. <code class="docutils literal notranslate"><span class="pre">CartComm</span></code> is the MPI communicator describing the
computing grid.</p>
<p>After the object has been instantiated, the
user registers the halos for the corresponding dimension and the five
numbers we described above, for the three dimensions (0 is the first
dimension).</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">he</span><span class="p">.</span><span class="n">add_halo</span><span class="o">&lt;</span><span class="mi">0</span><span class="o">&gt;</span><span class="p">(</span><span class="n">minus0</span><span class="p">,</span><span class="w"> </span><span class="n">plus0</span><span class="p">,</span><span class="w"> </span><span class="n">begin0</span><span class="p">,</span><span class="w"> </span><span class="n">end0</span><span class="p">,</span><span class="w"> </span><span class="n">len0</span><span class="p">);</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">add_halo</span><span class="o">&lt;</span><span class="mi">1</span><span class="o">&gt;</span><span class="p">(</span><span class="n">minus1</span><span class="p">,</span><span class="w"> </span><span class="n">plus1</span><span class="p">,</span><span class="w"> </span><span class="n">begin1</span><span class="p">,</span><span class="w"> </span><span class="n">end1</span><span class="p">,</span><span class="w"> </span><span class="n">len1</span><span class="p">);</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">add_halo</span><span class="o">&lt;</span><span class="mi">2</span><span class="o">&gt;</span><span class="p">(</span><span class="n">minus2</span><span class="p">,</span><span class="w"> </span><span class="n">plus2</span><span class="p">,</span><span class="w"> </span><span class="n">begin2</span><span class="p">,</span><span class="w"> </span><span class="n">end2</span><span class="p">,</span><span class="w"> </span><span class="n">len2</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>When the registration is done a setup function must be called before
running data exchange. The argument in the set up function is the
maximum number of data arrays that the pattern will exchange in a
single step. In this example we set it to 3, so that exchanging more
than 3 fields will lead to a runtime error. Be aware that setting a
larger number of supported fields leads to larger memory allocations.
The code looks like:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">he</span><span class="p">.</span><span class="n">setup</span><span class="p">(</span><span class="mi">3</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>Now we are ready to exchange the data, by passing (up to) three
pointers to the data to pack, then calling exchange and then unpack
into the destination data, as in the following example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">he</span><span class="p">.</span><span class="n">pack</span><span class="p">(</span><span class="n">array0</span><span class="p">,</span><span class="w"> </span><span class="n">array1</span><span class="p">,</span><span class="w"> </span><span class="n">array2</span><span class="p">);</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">start_exchange</span><span class="p">();</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">wait</span><span class="p">();</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">unpack</span><span class="p">(</span><span class="n">array0</span><span class="p">,</span><span class="w"> </span><span class="n">array1</span><span class="p">,</span><span class="w"> </span><span class="n">array2</span><span class="p">)</span><span class="w"></span>
</pre></div>
</div>
<p>Alternatively, the pointers can be put in a <code class="docutils literal notranslate"><span class="pre">std::vector&lt;value_type*&gt;</span></code> so that the code would look like:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">he</span><span class="p">.</span><span class="n">pack</span><span class="p">(</span><span class="n">vector_of_pointers</span><span class="p">);</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">start_exchange</span><span class="p">();</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">wait</span><span class="p">();</span><span class="w"></span>
<span class="n">he</span><span class="p">.</span><span class="n">unpack</span><span class="p">(</span><span class="n">vector_of_pointers</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>An alternative pattern supporting different element types is:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">pattern_type</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nl">halo_exchange_generic</span><span class="o">&lt;</span><span class="k">layout_map</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="o">&gt;</span><span class="p">,</span><span class="w"> </span><span class="n">arch_type</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>Now the <a class="reference internal" href="../glossary/glossary.html#term-Layout-Map"><span class="xref std std-term">Layout Map</span></a> in the type is the mapping of dimensions to
the computing grid (the number of dimensions is 3, so the layout map
has three elements), and arch_type is either <code class="docutils literal notranslate"><span class="pre">gcl::gpu</span></code> or <code class="docutils literal notranslate"><span class="pre">gcl::cpu</span></code>.</p>
<p>The construction of the object is identical to the previous one, but
the set-up somewhat more complex now, since we have to indicate the
maximum sizes and number of fields we will exchange using this object.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">array</span><span class="o">&lt;</span><span class="k">halo_descriptor</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="o">&gt;</span><span class="w"> </span><span class="n">halo_dsc</span><span class="p">;</span><span class="w"></span>
<span class="n">halo_dsc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">halo_descriptor</span><span class="p">(</span><span class="n">H1</span><span class="p">,</span><span class="w"> </span><span class="n">H1</span><span class="p">,</span><span class="w"> </span><span class="n">H1</span><span class="p">,</span><span class="w"> </span><span class="n">DIM1</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">H1</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">DIM1</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">H1</span><span class="p">);</span><span class="w"></span>
<span class="n">halo_dsc</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">halo_descriptor</span><span class="p">(</span><span class="n">H2</span><span class="p">,</span><span class="w"> </span><span class="n">H2</span><span class="p">,</span><span class="w"> </span><span class="n">H2</span><span class="p">,</span><span class="w"> </span><span class="n">DIM2</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">H2</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">DIM2</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">H2</span><span class="p">);</span><span class="w"></span>
<span class="n">halo_dsc</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">halo_descriptor</span><span class="p">(</span><span class="n">H3</span><span class="p">,</span><span class="w"> </span><span class="n">H3</span><span class="p">,</span><span class="w"> </span><span class="n">H3</span><span class="p">,</span><span class="w"> </span><span class="n">DIM3</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">H3</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">DIM3</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">H3</span><span class="p">);</span><span class="w"></span>

<span class="n">he</span><span class="p">.</span><span class="n">setup</span><span class="p">(</span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="c1">// maximum number of fields</span>
<span class="w">         </span><span class="n">field_on_the_fly</span><span class="o">&lt;</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="n">layoutmap</span><span class="p">,</span><span class="w"> </span><span class="n">pattern_type</span><span class="o">::</span><span class="n">traits</span><span class="o">&gt;</span><span class="p">(</span><span class="n">null_ptr</span><span class="p">,</span><span class="w"> </span><span class="n">halo_dsc</span><span class="p">),</span><span class="w"></span>
<span class="w">         </span><span class="k">sizeof</span><span class="p">(</span><span class="n">biggest_type_to_be_used</span><span class="p">));</span><span class="w"> </span><span class="c1">// Estimates the sizes</span>
</pre></div>
</div>
<p>The halo descriptors above indicate the largest arrays the user will
exchange, while the <code class="docutils literal notranslate"><span class="pre">field_on_the_fly</span></code> specify a type and layout
(and mandatory traits). The type does not have any effect here, and
neither the layout. The traits are important, and the halos are
essential.  With this pattern, the user needs to indicate what is the
size of the largest value type they will exchange.</p>
<p>When using the pattern, each data field should be wrapped into a
<code class="docutils literal notranslate"><span class="pre">field_on_the_fly</span></code> object, such as</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">field_on_the_fly</span><span class="o">&lt;</span><span class="n">value_type1</span><span class="p">,</span><span class="w"> </span><span class="n">layoutmap1</span><span class="p">,</span><span class="w"> </span><span class="n">pattern_type</span><span class="o">::</span><span class="n">traits</span><span class="o">&gt;</span><span class="w"> </span><span class="n">field1</span><span class="p">(</span><span class="w"></span>
<span class="w">    </span><span class="n">ptr1</span><span class="p">,</span><span class="w"> </span><span class="n">halo_dsc1</span><span class="p">);</span><span class="w"></span>
<span class="n">field_on_the_fly</span><span class="o">&lt;</span><span class="n">value_type2</span><span class="p">,</span><span class="w"> </span><span class="n">layoutmap2</span><span class="p">,</span><span class="w"> </span><span class="n">pattern_type</span><span class="o">::</span><span class="n">traits</span><span class="o">&gt;</span><span class="w"> </span><span class="n">field2</span><span class="p">(</span><span class="w"></span>
<span class="w">    </span><span class="n">ptr2</span><span class="p">,</span><span class="w"> </span><span class="n">halo_dsc2</span><span class="p">);</span><span class="w"></span>
<span class="n">field_on_the_fly</span><span class="o">&lt;</span><span class="n">value_type3</span><span class="p">,</span><span class="w"> </span><span class="n">layoutmap3</span><span class="p">,</span><span class="w"> </span><span class="n">pattern_type</span><span class="o">::</span><span class="n">traits</span><span class="o">&gt;</span><span class="w"> </span><span class="n">field3</span><span class="p">(</span><span class="w"></span>
<span class="w">    </span><span class="n">ptr3</span><span class="p">,</span><span class="w"> </span><span class="n">halo_dsc3</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>Now each field can have different types and layouts, and halo
descriptors. The exchange happens very similarly as before:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">he</span><span class="p">.</span><span class="n">pack</span><span class="p">(</span><span class="n">field1</span><span class="p">,</span><span class="w"> </span><span class="n">field2</span><span class="p">,</span><span class="w"> </span><span class="n">field3</span><span class="p">);</span><span class="w"></span>

<span class="n">he</span><span class="p">.</span><span class="n">exchange</span><span class="p">();</span><span class="w"></span>

<span class="n">he</span><span class="p">.</span><span class="n">unpack</span><span class="p">(</span><span class="n">field1</span><span class="p">,</span><span class="w"> </span><span class="n">field2</span><span class="p">,</span><span class="w"> </span><span class="n">field3</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The interface accepting a <code class="docutils literal notranslate"><span class="pre">std::vector</span></code> also works for this pattern (in case all the
fields have the same type).</p>
</section>
</section>
<section id="distributed-boundary-conditions">
<span id="id19"></span><h2>Distributed Boundary Conditions<a class="headerlink" href="#distributed-boundary-conditions" title="Permalink to this heading"></a></h2>
<section id="design-principles">
<h3>Design Principles:<a class="headerlink" href="#design-principles" title="Permalink to this heading"></a></h3>
<ul class="simple">
<li><p>When doing expandable parameters, the user may want to apply BCs and perform communications on a sub-set of the <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> collected in these data representations. For this reason an interface for applying distributed boundary conditions takes single data-stores only.</p></li>
<li><p>The user may want to apply different BCs to the same data-store at different times during an executions, so the binding between BCs and data-stores should be done at member-function level, not at class level, in order to remove the need for instantiation of heavy objects like <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a>-updates.</p></li>
<li><p>The same holds for the <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> to be exchanged: we need to plug the <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> at the last minute before doing the packing/unpacking and boundary apply. The requirement given by the underlying communication layer is that the number of data fields to be exchanged must be less than or equal to the maximum number of data fields specified at construction time.</p></li>
<li><p>The <a class="reference internal" href="../glossary/glossary.html#term-Halo-Exchange"><span class="xref std std-term">Halo Exchange</span></a> patterns are quite heavy objects so they have to be constructed and passed around as references. The <code class="docutils literal notranslate"><span class="pre">setup</span></code> needs to be executed only once to prevent memory leaks.</p></li>
<li><p>The <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> information for communication could be derived by a <code class="docutils literal notranslate"><span class="pre">storage_info</span></code> class, but there may be cases in which a separate <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> information can be provided, and different <code class="docutils literal notranslate"><span class="pre">storage_info</span></code> s (with different indices, for instance) may have the same communication requirements (for instance in cases of implicit staggering). For this reason the <cite>halo_descriptor</cite> is passed explicitly to the distributed boundary construction interface.</p></li>
<li><p>The <code class="docutils literal notranslate"><span class="pre">value_type</span></code> should be passed as an additional template parameter to the distributed boundaries interfaces. The <code class="docutils literal notranslate"><span class="pre">value_type</span></code> is used to compute the sizes of the buffers and the data movement operations needed by communication.</p></li>
</ul>
</section>
<section id="communication-traits">
<h3>Communication Traits<a class="headerlink" href="#communication-traits" title="Permalink to this heading"></a></h3>
<p>Communication traits helps the distributed boundary condition interface to customize itself to the need of the user. A general communication traits class is available in <code class="docutils literal notranslate"><span class="pre">distributed_boundaries/comm_traits.hpp</span></code>. The traits required by the distributed boundaries interface, as provided by <cite>GridTools</cite>, are listed below here.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">StorageType</span><span class="p">,</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">Arch</span><span class="o">&gt;</span><span class="w"></span>
<span class="k">struct</span><span class="w"> </span><span class="nc">comm_traits</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">  </span><span class="k">using</span><span class="w"> </span><span class="n">proc_layout</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">gridtools</span><span class="o">::</span><span class="k">layout_map</span><span class="o">&lt;</span><span class="p">...</span><span class="o">&gt;</span><span class="p">;</span><span class="w"> </span><span class="c1">// Layout of the processing grid to relate the data layout to the distribution of data</span>
<span class="w">  </span><span class="k">using</span><span class="w"> </span><span class="n">proc_grid_type</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">MPI_3D_process_grid_t</span><span class="p">;</span><span class="w"> </span><span class="c1">// Type of the computing grid</span>
<span class="w">  </span><span class="k">using</span><span class="w"> </span><span class="n">comm_arch_type</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">Arch</span><span class="p">;</span><span class="w"> </span><span class="c1">// Architecture for the communication pattern</span>
<span class="w">  </span><span class="n">compute_arch</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">backend</span><span class="o">::</span><span class="p">...;</span><span class="w"> </span><span class="c1">// Architecture of the stencil/boundary condition backend</span>
<span class="w">  </span><span class="k">static</span><span class="w"> </span><span class="k">constexpr</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">version</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">gridtools</span><span class="o">::</span><span class="n">packing_version</span><span class="o">::</span><span class="p">...;</span><span class="w"> </span><span class="c1">// Packing/Unpacking version</span>
<span class="w">  </span><span class="k">using</span><span class="w"> </span><span class="n">data_layout</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">StorageType</span><span class="o">::</span><span class="n">storage_info_t</span><span class="o">::</span><span class="n">layout_t</span><span class="p">;</span><span class="w"> </span><span class="c1">// Layout of data</span>
<span class="w">  </span><span class="k">using</span><span class="w"> </span><span class="n">value_type</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="k">typename</span><span class="w"> </span><span class="nc">StorageType</span><span class="o">::</span><span class="n">data_t</span><span class="p">;</span><span class="w"> </span><span class="c1">// Value Type</span>
<span class="p">};</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="binding-boundaries-and-communication">
<h3>Binding Boundaries and Communication<a class="headerlink" href="#binding-boundaries-and-communication" title="Permalink to this heading"></a></h3>
<p><cite>GridTools</cite> provides a facility for applying boundary conditions. The distributed boundaries interfaces uses this facility underneath. The boundary application in <cite>GridTools</cite> accept specific boundary classes that specify how to deal with boundaries in different directions and predicated to deal with periodicity and domain decomposition. The latter will be dealt by the distributed boundary interfaces (refer to the boundary condition interfaces for more details).</p>
<p>The distributed boundaries interface require a user to specify which <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> required communication and which require also boundary conditions, and in the latter case what boundary functions to use.</p>
<p>The way of specifying this is through the function <code class="docutils literal notranslate"><span class="pre">bind_bc</span></code> which has the following signature:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">unspecified_type</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">bind_bc</span><span class="p">(</span><span class="n">boundary_class</span><span class="p">,</span><span class="w"> </span><span class="n">data_stores</span><span class="p">,</span><span class="w"> </span><span class="p">...);</span><span class="w"></span>
</pre></div>
</div>
<p>The number of <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> is dictated by the <code class="docutils literal notranslate"><span class="pre">boundary_class::operator()</span></code>, that is user defined (or provided by <cite>GridTools</cite>).</p>
<p>The <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> specified in the function call will be passed to the <code class="docutils literal notranslate"><span class="pre">boundary_class</span></code> and also used in <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a>-update operations.</p>
<p>However, some data fields used in boundary conditions may be read-only and should not be passed to the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a>-update operation, both for avoiding unnecessary operations and to limit the amount of memory used by the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a>-update layer. For this reason the <code class="docutils literal notranslate"><span class="pre">data_store</span></code> passed the <code class="docutils literal notranslate"><span class="pre">bind_bc</span></code> can actually be <code class="docutils literal notranslate"><span class="pre">std::placeholders</span></code>. Only the actual <code class="docutils literal notranslate"><span class="pre">data_store</span></code> specified in the <code class="docutils literal notranslate"><span class="pre">bind_bc</span></code> call will be passed to the communication layer. To bind the <code class="docutils literal notranslate"><span class="pre">placeholders</span></code> to actual <code class="docutils literal notranslate"><span class="pre">data_store</span></code> the user must bind then using <code class="docutils literal notranslate"><span class="pre">.associate(data_stores...)</span></code> with the same mechanism used in <code class="docutils literal notranslate"><span class="pre">std::bind</span></code> as in the following example, in which <code class="docutils literal notranslate"><span class="pre">data_store</span></code> <code class="docutils literal notranslate"><span class="pre">c</span></code> is associated to placeholder <code class="docutils literal notranslate"><span class="pre">_1</span></code>.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="k">namespace</span><span class="w"> </span><span class="nn">std</span><span class="o">::</span><span class="nn">placeholders</span><span class="p">;</span><span class="w"></span>
<span class="n">bind_bc</span><span class="p">(</span><span class="n">copy_boundary</span><span class="p">{},</span><span class="w"> </span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">_1</span><span class="p">).</span><span class="n">associate</span><span class="p">(</span><span class="n">c</span><span class="p">)</span><span class="w"></span>
</pre></div>
</div>
<p>This example, copies the boundary of <code class="docutils literal notranslate"><span class="pre">c</span></code> into <code class="docutils literal notranslate"><span class="pre">b</span></code>, and performs the halo exchanges for <code class="docutils literal notranslate"><span class="pre">b</span></code>. The halo exchange will not be executed on <code class="docutils literal notranslate"><span class="pre">c</span></code>, which is <cite>read only</cite> in this call.</p>
<p>If halo exchanges should be applied to both fields, and the boundary of <code class="docutils literal notranslate"><span class="pre">`c</span></code> should be copied into <code class="docutils literal notranslate"><span class="pre">b</span></code>, both fields should be used used in <code class="docutils literal notranslate"><span class="pre">bind_bc</span></code>:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">bind_bc</span><span class="p">(</span><span class="n">copy_boundary</span><span class="p">{},</span><span class="w"> </span><span class="n">a</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="distributed-boundaries">
<h3>Distributed Boundaries<a class="headerlink" href="#distributed-boundaries" title="Permalink to this heading"></a></h3>
<p>The distributed boundaries class takes the communication traits as template argument. In the next example we use the communication traits class provided by <cite>GridTools</cite>, and <code class="docutils literal notranslate"><span class="pre">communication_arch</span></code> is one of the <a class="reference internal" href="../glossary/glossary.html#term-GCL"><span class="xref std std-term">GCL</span></a> specifiers of where the data accessed by a <a class="reference internal" href="../glossary/glossary.html#term-Halo-Exchange"><span class="xref std std-term">Halo Exchange</span></a> object reside.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">using</span><span class="w"> </span><span class="n">dbs_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">distributed_boundaries</span><span class="o">&lt;</span><span class="n">comm_traits</span><span class="o">&lt;</span><span class="n">storage_type</span><span class="p">,</span><span class="w"> </span><span class="n">communication_arch</span><span class="o">&gt;&gt;</span><span class="p">;</span><span class="w"></span>
</pre></div>
</div>
<p>During construction more information is required about the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> structure. We use here the usual <a class="reference internal" href="../glossary/glossary.html#term-Halo-Descriptor"><span class="xref std std-term">Halo Descriptor</span></a>.</p>
<p>The user needs also to indicate which dimensions are periodic (refer to <a class="reference internal" href="#gcl-communication-module"><span class="std std-ref">GCL Communication Module</span></a> for more information), and this is done using another <cite>GridTools</cite> facility which is the <code class="docutils literal notranslate"><span class="pre">boollist</span></code>. Finally, to let the library compute the right amount of memory to allocate before hand, the maximum number of fields to be exchanged in one call have to be specified. The code showing an example of how to do it follows:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="k">halo_descriptor</span><span class="w"> </span><span class="n">di</span><span class="p">{</span><span class="n">halo_sx0</span><span class="p">,</span><span class="w"> </span><span class="n">halo_dx0</span><span class="p">,</span><span class="w"> </span><span class="n">begin0</span><span class="p">,</span><span class="w"> </span><span class="n">end0</span><span class="p">,</span><span class="w"> </span><span class="n">len0</span><span class="p">};</span><span class="w"></span>
<span class="k">halo_descriptor</span><span class="w"> </span><span class="n">dj</span><span class="p">{</span><span class="n">halo_sx1</span><span class="p">,</span><span class="w"> </span><span class="n">halo_dx1</span><span class="p">,</span><span class="w"> </span><span class="n">begin1</span><span class="p">,</span><span class="w"> </span><span class="n">end1</span><span class="p">,</span><span class="w"> </span><span class="n">len1</span><span class="p">};</span><span class="w"></span>
<span class="k">halo_descriptor</span><span class="w"> </span><span class="n">dk</span><span class="p">{</span><span class="n">halo_sx2</span><span class="p">,</span><span class="w"> </span><span class="n">halo_dx2</span><span class="p">,</span><span class="w"> </span><span class="n">begin2</span><span class="p">,</span><span class="w"> </span><span class="n">end2</span><span class="p">,</span><span class="w"> </span><span class="n">len2</span><span class="p">};</span><span class="w"></span>
<span class="n">array</span><span class="o">&lt;</span><span class="k">halo_descriptor</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="o">&gt;</span><span class="w"> </span><span class="n">halos</span><span class="p">{</span><span class="n">di</span><span class="p">,</span><span class="w"> </span><span class="n">dj</span><span class="p">,</span><span class="w"> </span><span class="n">dk</span><span class="p">};</span><span class="w"></span>

<span class="n">boollist</span><span class="o">&lt;</span><span class="mi">3</span><span class="o">&gt;</span><span class="w"> </span><span class="n">periodicity</span><span class="p">{</span><span class="n">b0</span><span class="p">,</span><span class="w"> </span><span class="n">b1</span><span class="p">,</span><span class="w"> </span><span class="n">b2</span><span class="p">};</span><span class="w"> </span><span class="c1">// b0, b1, b2 are booleans. If true it will indicate that the corresponding dimension is periodic across the grid of processors.</span>

<span class="kt">int</span><span class="w"> </span><span class="n">max_ds</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">4</span><span class="p">;</span><span class="w"> </span><span class="c1">// maximum number of data stores to be used in a halo_update operation</span>

<span class="n">dbs_t</span><span class="w"> </span><span class="n">dbs</span><span class="p">{</span><span class="n">halos</span><span class="p">,</span><span class="w"> </span><span class="n">periodicity</span><span class="p">,</span><span class="w"> </span><span class="n">max_ds</span><span class="p">,</span><span class="w"> </span><span class="n">MPI_COMMUNICATOR</span><span class="p">};</span><span class="w"></span>
</pre></div>
</div>
<p>Above here the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> are the local <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> sizes, which are usually the tiles of a domain decomposed <cite>global domain</cite>, which has <cite>global boundaries</cite>. The idea is to apply the boundary conditions to the global boundaries while performing <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> updates for the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> regions between sub-domains of the domain decomposed global domain.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">distributed_boundary</span></code> object allows the user to query the properties of the grid of processes, for instance the coordinates of the current process and the size of the computing grid.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="kt">int</span><span class="w"> </span><span class="n">pi</span><span class="p">,</span><span class="w"> </span><span class="n">pj</span><span class="p">,</span><span class="w"> </span><span class="n">pk</span><span class="p">;</span><span class="w"></span>
<span class="n">dist_boundaries</span><span class="p">.</span><span class="n">proc_grid</span><span class="p">().</span><span class="n">coords</span><span class="p">(</span><span class="n">pi</span><span class="p">,</span><span class="w"> </span><span class="n">pj</span><span class="p">,</span><span class="w"> </span><span class="n">pk</span><span class="p">);</span><span class="w"> </span><span class="c1">// Coordinates of current process</span>
<span class="kt">int</span><span class="w"> </span><span class="n">PI</span><span class="p">,</span><span class="w"> </span><span class="n">PJ</span><span class="p">,</span><span class="w"> </span><span class="n">PK</span><span class="p">;</span><span class="w"></span>
<span class="n">dist_boundaries</span><span class="p">.</span><span class="n">proc_grid</span><span class="p">().</span><span class="n">dims</span><span class="p">(</span><span class="n">PI</span><span class="p">,</span><span class="w"> </span><span class="n">PJ</span><span class="p">,</span><span class="w"> </span><span class="n">PK</span><span class="p">);</span><span class="w"> </span><span class="c1">// Sizes of the current grid of processes</span>
</pre></div>
</div>
<p>When invoking the boundary application and <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a>-update operations the user calls the <code class="docutils literal notranslate"><span class="pre">exchange</span></code> member of <code class="docutils literal notranslate"><span class="pre">distributed_boundaries</span></code>. The arguments of <code class="docutils literal notranslate"><span class="pre">exchange</span></code> are either <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> stores or <code class="docutils literal notranslate"><span class="pre">bind_bc</span></code> objects which associate a boundary condition to <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a>. The <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Stores</span></a> passed directly to the <code class="docutils literal notranslate"><span class="pre">exchange</span></code> methods have their halo updated according to the halo and periodicity information specified at <code class="docutils literal notranslate"><span class="pre">distributed_boundaries</span></code> object construction.Arguments created with <code class="docutils literal notranslate"><span class="pre">bind_bc</span></code> are updated as mentioned above; halo exchanges are only applied if the fields are inside <code class="docutils literal notranslate"><span class="pre">bind_bc</span></code>, but not in <code class="docutils literal notranslate"><span class="pre">associate</span></code>.</p>
<p>Next, we show a complete example where two boundary are applied using a fixed value on <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Store</span></a> <code class="docutils literal notranslate"><span class="pre">a</span></code> and a <code class="docutils literal notranslate"><span class="pre">copy_boundary</span></code> to copy the value of <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Store</span></a> <code class="docutils literal notranslate"><span class="pre">c</span></code> into <a class="reference internal" href="../glossary/glossary.html#term-Data-Store"><span class="xref std std-term">Data Store</span></a> <code class="docutils literal notranslate"><span class="pre">b</span></code> (refer to <a class="reference internal" href="#gcl-communication-module"><span class="std std-ref">GCL Communication Module</span></a>). The halos of data store <code class="docutils literal notranslate"><span class="pre">c</span></code> will not be exchange; this field serves as source of data for the <code class="docutils literal notranslate"><span class="pre">copy_boundary</span></code>. Three fields will have their <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> updated by the next example, namely <code class="docutils literal notranslate"><span class="pre">a</span></code>, <code class="docutils literal notranslate"><span class="pre">b</span></code> and <code class="docutils literal notranslate"><span class="pre">d</span></code>:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">dist_boundaries</span><span class="p">.</span><span class="n">exchange</span><span class="p">(</span><span class="n">bind_bc</span><span class="p">(</span><span class="n">value_boundary</span><span class="o">&lt;</span><span class="kt">double</span><span class="o">&gt;</span><span class="p">{</span><span class="mf">3.14</span><span class="p">},</span><span class="w"> </span><span class="n">a</span><span class="p">),</span><span class="w"> </span><span class="n">bind_bc</span><span class="p">(</span><span class="n">copy_boundary</span><span class="p">{},</span><span class="w"> </span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">_1</span><span class="p">).</span><span class="n">associate</span><span class="p">(</span><span class="n">c</span><span class="p">),</span><span class="w"> </span><span class="n">d</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>An additional facility provided is an alternative to the <code class="docutils literal notranslate"><span class="pre">exchange</span></code> method. This is used to skip the <a class="reference internal" href="../glossary/glossary.html#term-Halo"><span class="xref std std-term">Halo</span></a> updates altogether, and it is called <code class="docutils literal notranslate"><span class="pre">boundary_only</span></code>, and the code to use it is identical to the previous example, barring the function name:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">dist_boundaries</span><span class="p">.</span><span class="n">boundary_only</span><span class="p">(</span><span class="n">bind_bc</span><span class="p">(</span><span class="n">value_boundary</span><span class="o">&lt;</span><span class="kt">double</span><span class="o">&gt;</span><span class="p">{</span><span class="mf">3.14</span><span class="p">},</span><span class="w"> </span><span class="n">a</span><span class="p">),</span><span class="w"> </span><span class="n">bind_bc</span><span class="p">(</span><span class="n">copy_boundary</span><span class="p">{},</span><span class="w"> </span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">_1</span><span class="p">).</span><span class="n">associate</span><span class="p">(</span><span class="n">c</span><span class="p">),</span><span class="w"> </span><span class="n">d</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>This function will not do any halo exchange, but only update the boundaries of <code class="docutils literal notranslate"><span class="pre">a</span></code> and <code class="docutils literal notranslate"><span class="pre">b</span></code>. Passing <code class="docutils literal notranslate"><span class="pre">d</span></code> is possible, but redundant as no boundary is given.</p>
</section>
</section>
<section id="interfacing-to-other-programming-languages">
<h2>Interfacing to other programming languages<a class="headerlink" href="#interfacing-to-other-programming-languages" title="Permalink to this heading"></a></h2>
<p><cite>GridTools</cite> provides an easy macro interface to generate bindings to C and Fortran. This library is available separately
at <a class="reference external" href="https://github.com/GridTools/cpp_bindgen">GridTools/cpp_bindgen</a>.</p>
<p>To make the cpp_bindgen library available the recommended way is to use CMake’s FetchContent as follows</p>
<div class="highlight-CMake notranslate"><div class="highlight"><pre><span></span><span class="nb">include</span><span class="p">(</span><span class="s">FetchContent</span><span class="p">)</span>
<span class="nb">FetchContent_Declare</span><span class="p">(</span>
<span class="w">  </span><span class="s">cpp_bindgen</span>
<span class="w">  </span><span class="s">GIT_REPOSITORY</span><span class="w"> </span><span class="s">https://github.com/GridTools/cpp_bindgen.git</span>
<span class="w">  </span><span class="s">GIT_TAG</span><span class="w">        </span><span class="s">master</span><span class="w"> </span><span class="c"># consider replacing master by a tagged version</span>
<span class="p">)</span>
<span class="nb">FetchContent_MakeAvailable</span><span class="p">(</span><span class="s">cpp_bindgen</span><span class="p">)</span>
</pre></div>
</div>
<p>Suppose, the user wants to export the function <code class="docutils literal notranslate"><span class="pre">add_impl</span></code>.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="kt">int</span><span class="w"> </span><span class="nf">add_impl</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="n">l</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">r</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="k">return</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">r</span><span class="p">;</span><span class="w"></span>
<span class="p">}</span><span class="w"></span>
</pre></div>
</div>
<p>The macros <code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_*</span></code> provide ways to generate bindings to functions. The different
flavours of this macro are explained below. The macro generates a wrapper around the function
<code class="docutils literal notranslate"><span class="pre">add_impl</span></code> which is called <code class="docutils literal notranslate"><span class="pre">add</span></code> and registers the function to be exported.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;cpp_bindgen/export.hpp&gt;</span><span class="cp"></span>
<span class="n">BINDGEN_EXPORT_BINDING_2</span><span class="p">(</span><span class="n">add</span><span class="p">,</span><span class="w"> </span><span class="n">add_impl</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The user can generate a C header and a Fortran module that matches this header by
adding a call to <code class="docutils literal notranslate"><span class="pre">bindgen_add_library</span></code> in his CMake project:</p>
<div class="highlight-CMake notranslate"><div class="highlight"><pre><span></span><span class="nb">bindgen_add_library</span><span class="p">(</span><span class="s">add_lib</span><span class="w"> </span><span class="s">SOURCES</span><span class="w"> </span><span class="s">add.cpp</span><span class="p">)</span>
</pre></div>
</div>
<p>This will generate a library <code class="docutils literal notranslate"><span class="pre">add_lib</span></code> which contains the exported symbol <code class="docutils literal notranslate"><span class="pre">add</span></code>,
and it will generate a target <code class="docutils literal notranslate"><span class="pre">add_lib_declarations</span></code> that generates the files
<code class="docutils literal notranslate"><span class="pre">add_lib.h</span></code> and <code class="docutils literal notranslate"><span class="pre">add_lib.f90</span></code> containing the bindings that can be used from C
and Fortran.</p>
<p>The C header contains the exported function (boilerplate code removed):</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="kt">int</span><span class="w"> </span><span class="nf">add</span><span class="p">(</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The generated Fortran module contains the corresponding declaration:</p>
<div class="highlight-fortran notranslate"><div class="highlight"><pre><span></span><span class="k">module </span><span class="n">add_lib</span><span class="w"></span>
<span class="k">implicit none</span>
<span class="k">  interface</span>
<span class="k">    </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">)</span><span class="w"> </span><span class="k">function </span><span class="n">add</span><span class="p">(</span><span class="n">arg0</span><span class="p">,</span><span class="w"> </span><span class="n">arg1</span><span class="p">)</span><span class="w"> </span><span class="k">bind</span><span class="p">(</span><span class="n">c</span><span class="p">)</span><span class="w"></span>
<span class="w">      </span><span class="k">use </span><span class="nb">iso_c_binding</span>
<span class="nb">      </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">),</span><span class="w"> </span><span class="k">value</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg0</span><span class="w"></span>
<span class="w">      </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">),</span><span class="w"> </span><span class="k">value</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg1</span><span class="w"></span>
<span class="w">    </span><span class="k">end function</span>
<span class="k">  end interface</span>
<span class="k">end</span><span class="w"></span>
</pre></div>
</div>
<section id="exporting-functions-with-no-array-type-arguments">
<h3>Exporting functions with no array-type arguments<a class="headerlink" href="#exporting-functions-with-no-array-type-arguments" title="Permalink to this heading"></a></h3>
<p>There exist various flavours of these macros. Functions which are non-templated or fully-specialized
can be exported with <code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING</span></code>, for example:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="kt">int</span><span class="w"> </span><span class="nf">add_impl</span><span class="p">(</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="p">}</span><span class="w"></span>
<span class="n">BINDGEN_EXPORT_BINDING_2</span><span class="p">(</span><span class="n">add</span><span class="p">,</span><span class="w"> </span><span class="n">add_impl</span><span class="p">);</span><span class="w"></span>

<span class="k">template</span><span class="w"> </span><span class="o">&lt;</span><span class="k">typename</span><span class="w"> </span><span class="nc">T</span><span class="o">&gt;</span><span class="w"></span>
<span class="n">T</span><span class="w"> </span><span class="n">add_impl</span><span class="p">(</span><span class="n">T</span><span class="p">,</span><span class="w"> </span><span class="n">T</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="p">{};</span><span class="w"> </span><span class="p">}</span><span class="w"></span>
<span class="n">BINDGEN_EXPORT_BINDING</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">add_impl</span><span class="o">&lt;</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="o">&gt;</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>All functions exist in two flavours: Either you can pass the number of arguments as part of the name
of the macro (<code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING_2</span></code> stands for two arguments), or you can pass it as a first argument
to the generic <code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING</span></code>. The first flavours exist for up to 9 arguments.</p>
<p>Note that <code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING_X</span></code> requires a name and a function pointer as its arguments.
A lambda cannot be passed as function pointer; thus, the type of the arguments cannot be
deduced. In such cases, the functions can be exported with <code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING_WITH_SIGNATURE_X</span></code>,
which additionally takes the function type as an argument:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">BINDGEN_EXPORT_BINDING_WITH_SIGNATURE_2</span><span class="p">(</span><span class="n">add</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="p">(</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="p">),</span><span class="w"> </span><span class="p">[](</span><span class="kt">int</span><span class="w"> </span><span class="n">l</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">r</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">r</span><span class="p">;</span><span class="w"> </span><span class="p">});</span><span class="w"></span>
</pre></div>
</div>
<p>Templated functions can be exported for a given set of specializations using
<code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_GENERIC_BINDING_X</span></code>. In addition to the function name and the function pointer, it takes a list of
overloads for which the bindings are generated:</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">BINDGEN_EXPORT_GENERIC_BINDING</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">add</span><span class="p">,</span><span class="w"> </span><span class="n">add_impl</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="p">)(</span><span class="kt">double</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="p">));</span><span class="w"></span>
</pre></div>
</div>
<p>In the generated Fortran module, generic bindings will produce an interface combining the different
overloads:</p>
<div class="highlight-fortran notranslate"><div class="highlight"><pre><span></span><span class="k">interface</span>
<span class="k">  </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">)</span><span class="w"> </span><span class="k">function </span><span class="n">add_f0</span><span class="p">(</span><span class="n">arg0</span><span class="p">,</span><span class="w"> </span><span class="n">arg1</span><span class="p">)</span><span class="w"> </span><span class="k">bind</span><span class="p">(</span><span class="n">c</span><span class="p">)</span><span class="w"></span>
<span class="w">    </span><span class="k">use </span><span class="nb">iso_c_binding</span>
<span class="nb">    </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">),</span><span class="w"> </span><span class="k">value</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg0</span><span class="w"></span>
<span class="w">    </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">),</span><span class="w"> </span><span class="k">value</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg1</span><span class="w"></span>
<span class="w">  </span><span class="k">end function</span>
<span class="k">  </span><span class="kt">real</span><span class="p">(</span><span class="kt">c_double</span><span class="p">)</span><span class="w"> </span><span class="k">function </span><span class="n">add_f1</span><span class="p">(</span><span class="n">arg0</span><span class="p">,</span><span class="w"> </span><span class="n">arg1</span><span class="p">)</span><span class="w"> </span><span class="k">bind</span><span class="p">(</span><span class="n">c</span><span class="p">)</span><span class="w"></span>
<span class="w">    </span><span class="k">use </span><span class="nb">iso_c_binding</span>
<span class="nb">    </span><span class="kt">real</span><span class="p">(</span><span class="kt">c_double</span><span class="p">),</span><span class="w"> </span><span class="k">value</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg0</span><span class="w"></span>
<span class="w">    </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">),</span><span class="w"> </span><span class="k">value</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg1</span><span class="w"></span>
<span class="w">  </span><span class="k">end function</span>
<span class="k">end interface</span>
<span class="k">interface </span><span class="n">add</span><span class="w"></span>
<span class="w">  </span><span class="k">procedure </span><span class="n">add_f0</span><span class="p">,</span><span class="w"> </span><span class="n">add_f1</span><span class="w"></span>
<span class="k">end interface</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="complex-types">
<h3>Complex types<a class="headerlink" href="#complex-types" title="Permalink to this heading"></a></h3>
<p>Only a limited set of types can be passed from Fortran / C through the C bindings interface to C++,
namely integral and floating point types, booleans and pointers to those types.</p>
<p>Array references, <cite>GridTools</cite> storages, and any type that is <cite>fortran_array_bindable</cite>
appear as <code class="docutils literal notranslate"><span class="pre">bindgen_fortran_array_descriptor</span></code> in the C bindings. This structure allows
the user to describe the data that needs to be passed to C++.</p>
<p>It is possible to write bindings to functions that accept or return other types.
During the generation process, they are replaced with pointers to the type <code class="docutils literal notranslate"><span class="pre">bindgen_handle</span></code>.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="w"> </span><span class="n">make_vector_impl</span><span class="p">()</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="p">{};</span><span class="w"> </span><span class="p">}</span><span class="w"></span>
<span class="kt">void</span><span class="w"> </span><span class="n">use_vector_impl</span><span class="p">(</span><span class="n">std</span><span class="o">::</span><span class="n">vector</span><span class="o">&lt;</span><span class="kt">int</span><span class="o">&gt;</span><span class="p">)</span><span class="w"> </span><span class="p">{}</span><span class="w"></span>
<span class="n">BINDGEN_EXPORT_BINDING_0</span><span class="p">(</span><span class="n">make_vector</span><span class="p">,</span><span class="w"> </span><span class="n">make_vector_impl</span><span class="p">);</span><span class="w"></span>
<span class="n">BINDGEN_EXPORT_BINDING_1</span><span class="p">(</span><span class="n">use_vector</span><span class="p">,</span><span class="w"> </span><span class="n">use_vector_impl</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The code above will generate the following signatures in the C-header:</p>
<div class="highlight-C notranslate"><div class="highlight"><pre><span></span><span class="n">bindgen_handle</span><span class="o">*</span><span class="w"> </span><span class="nf">make_vector</span><span class="p">();</span><span class="w"></span>
<span class="kt">void</span><span class="w"> </span><span class="nf">use_vector</span><span class="p">(</span><span class="n">bindgen_handle</span><span class="o">*</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The user needs to make sure that the types that stand behind <code class="docutils literal notranslate"><span class="pre">bindgen_handle</span></code> match, otherwise
an exception will be thrown.</p>
</section>
<section id="exporting-functions-with-array-type-arguments-to-fortran">
<h3>Exporting functions with array-type arguments to Fortran<a class="headerlink" href="#exporting-functions-with-array-type-arguments-to-fortran" title="Permalink to this heading"></a></h3>
<p>Special macros exist to export function that take array-like arguments to Fortran. While the normal
macros export such arguments as <code class="docutils literal notranslate"><span class="pre">bindgen_fortran_array_descriptor</span></code>, the “wrapped” macros create
additional wrappers around the functions that fill the structures themselves.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="kt">void</span><span class="w"> </span><span class="nf">dummy_impl</span><span class="p">(</span><span class="kt">int</span><span class="w"> </span><span class="p">(</span><span class="o">&amp;</span><span class="n">a</span><span class="p">)[</span><span class="mi">2</span><span class="p">][</span><span class="mi">2</span><span class="p">])</span><span class="w"> </span><span class="p">{}</span><span class="w"></span>
<span class="n">BINDGEN_EXPORT_BINDING_WRAPPED_1</span><span class="p">(</span><span class="n">dummy</span><span class="p">,</span><span class="w"> </span><span class="n">dummy_impl</span><span class="p">);</span><span class="w"></span>
</pre></div>
</div>
<p>The function <code class="docutils literal notranslate"><span class="pre">dummy_impl</span></code> is taking a reference to an array. When exporting this function with
<code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING_WRAPPED_X</span></code>, an additional wrapper is generated in the Fortran bindings:</p>
<div class="highlight-fortran notranslate"><div class="highlight"><pre><span></span><span class="k">module </span><span class="n">add_lib</span><span class="w"></span>
<span class="k">implicit none</span>
<span class="k">  interface</span>
<span class="k">    subroutine </span><span class="n">dummy_impl</span><span class="p">(</span><span class="n">arg0</span><span class="p">)</span><span class="w"> </span><span class="k">bind</span><span class="p">(</span><span class="n">c</span><span class="p">,</span><span class="w"> </span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;dummy&quot;</span><span class="p">)</span><span class="w"></span>
<span class="w">      </span><span class="k">use </span><span class="nb">iso_c_binding</span>
<span class="nb">      </span><span class="k">use </span><span class="n">array_descriptor</span><span class="w"></span>
<span class="w">      </span><span class="k">type</span><span class="p">(</span><span class="n">bindgen_fortran_array_descriptor</span><span class="p">)</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg0</span><span class="w"></span>
<span class="w">    </span><span class="k">end subroutine</span>
<span class="k">  end interface</span>
<span class="k">contains</span>
<span class="k">    subroutine </span><span class="n">dummy</span><span class="p">(</span><span class="n">arg0</span><span class="p">)</span><span class="w"></span>
<span class="w">      </span><span class="k">use </span><span class="nb">iso_c_binding</span>
<span class="nb">      </span><span class="k">use </span><span class="n">array_descriptor</span><span class="w"></span>
<span class="w">      </span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">),</span><span class="w"> </span><span class="k">dimension</span><span class="p">(:,:),</span><span class="w"> </span><span class="k">target</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">arg0</span><span class="w"></span>
<span class="w">      </span><span class="k">type</span><span class="p">(</span><span class="n">bindgen_fortran_array_descriptor</span><span class="p">)</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">descriptor0</span><span class="w"></span>

<span class="w">      </span><span class="n">descriptor0</span><span class="p">%</span><span class="n">rank</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="w"></span>
<span class="w">      </span><span class="n">descriptor0</span><span class="p">%</span><span class="k">type</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="w"></span>
<span class="w">      </span><span class="n">descriptor0</span><span class="p">%</span><span class="n">dims</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">reshape</span><span class="p">(</span><span class="nb">shape</span><span class="p">(</span><span class="n">arg0</span><span class="p">),</span><span class="w"> </span><span class="p">&amp;</span><span class="w"></span>
<span class="w">        </span><span class="nb">shape</span><span class="p">(</span><span class="n">descriptor0</span><span class="p">%</span><span class="n">dims</span><span class="p">),</span><span class="w"> </span><span class="p">(</span><span class="o">/</span><span class="mi">0</span><span class="o">/</span><span class="p">))</span><span class="w"></span>
<span class="w">      </span><span class="n">descriptor0</span><span class="p">%</span><span class="k">data</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">c_loc</span><span class="p">(</span><span class="n">arg0</span><span class="p">(</span><span class="nb">lbound</span><span class="p">(</span><span class="n">arg0</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">),</span><span class="nb">lbound</span><span class="p">(</span><span class="n">arg0</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">)))</span><span class="w"></span>

<span class="w">      </span><span class="k">call </span><span class="n">dummy_impl</span><span class="p">(</span><span class="n">descriptor0</span><span class="p">)</span><span class="w"></span>
<span class="w">    </span><span class="k">end subroutine</span>
<span class="k">end</span><span class="w"></span>
</pre></div>
</div>
<p>This allows to call the Fortran function <code class="docutils literal notranslate"><span class="pre">dummy</span></code> in a convenient way:</p>
<div class="highlight-fortran notranslate"><div class="highlight"><pre><span></span><span class="kt">integer</span><span class="p">(</span><span class="kt">c_int</span><span class="p">),</span><span class="w"> </span><span class="k">dimension</span><span class="p">(:,</span><span class="w"> </span><span class="p">:)</span><span class="w"> </span><span class="kd">::</span><span class="w"> </span><span class="n">some_array</span><span class="w"></span>
<span class="k">call </span><span class="n">dummy</span><span class="p">(</span><span class="n">some_array</span><span class="p">)</span><span class="w"></span>
</pre></div>
</div>
<p>The bindings will take care that the rank matches and it will infer the size of the array automatically.</p>
<p>All additional macros behave as mentioned above, namely <code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING_WITH_SIGNATURE_WRAPPED</span></code>,
and <code class="docutils literal notranslate"><span class="pre">BINDGEN_EXPORT_BINDING_GENERIC_WRAPPED</span></code>.</p>
<p>Data types need to be <cite>fortran_array_wrappable</cite> in order to be compatible with these macros. Natively, only
C arrays and <code class="docutils literal notranslate"><span class="pre">fortran_array_adapter</span></code> are <cite>fortran_array_wrappable</cite>. The latter is an adapter between
Fortran arrays and <cite>GridTools</cite> storages, such that the user can pass a Fortran array to a C++ function,
which then can be transformed into a <cite>GridTools</cite> storage.</p>
<div class="highlight-gridtools notranslate"><div class="highlight"><pre><span></span><span class="cp">#include</span><span class="w"> </span><span class="cpf">&lt;gridtools/storage/adapter/fortran_array_adapter.hpp&gt;</span><span class="cp"></span>

<span class="k">using</span><span class="w"> </span><span class="n">storage_info_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">storage_traits</span><span class="o">&lt;</span><span class="n">Backend</span><span class="o">&gt;::</span><span class="n">storage_info_t</span><span class="o">&lt;</span><span class="mi">0</span><span class="p">,</span><span class="w"> </span><span class="mi">3</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>
<span class="k">using</span><span class="w"> </span><span class="n">data_store_t</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">storage_traits</span><span class="o">&lt;</span><span class="n">Backend</span><span class="o">&gt;::</span><span class="n">data_store_t</span><span class="o">&lt;</span><span class="kt">double</span><span class="p">,</span><span class="w"> </span><span class="n">storage_info_t</span><span class="o">&gt;</span><span class="p">;</span><span class="w"></span>

<span class="kt">void</span><span class="w"> </span><span class="nf">modify_array_impl</span><span class="p">(</span><span class="n">fortran_array_adapter</span><span class="o">&lt;</span><span class="n">data_store_t</span><span class="o">&gt;</span><span class="w"> </span><span class="n">inout</span><span class="p">)</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
<span class="w">    </span><span class="n">data_store_t</span><span class="w"> </span><span class="n">data_store</span><span class="p">{</span><span class="n">storage_info_t</span><span class="p">{</span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">,</span><span class="w"> </span><span class="mi">10</span><span class="p">}};</span><span class="w"></span>
<span class="w">    </span><span class="n">transform</span><span class="p">(</span><span class="n">data_store</span><span class="p">,</span><span class="w"> </span><span class="n">inout</span><span class="p">);</span><span class="w"></span>

<span class="w">    </span><span class="c1">// use data_store</span>

<span class="w">    </span><span class="n">transform</span><span class="p">(</span><span class="n">inout</span><span class="p">,</span><span class="w"> </span><span class="n">data_store</span><span class="p">);</span><span class="w"></span>
<span class="p">}</span><span class="w"></span>
<span class="n">BINDGEN_EXPORT_BINDING_WRAPPED_1</span><span class="p">(</span><span class="n">modify_array</span><span class="p">,</span><span class="w"> </span><span class="n">modify_array_impl</span><span class="p">)</span><span class="w"></span>
</pre></div>
</div>
</section>
<section id="cmake-usage">
<h3>CMake usage<a class="headerlink" href="#cmake-usage" title="Permalink to this heading"></a></h3>
<p>A call to <code class="docutils literal notranslate"><span class="pre">bindgen_add_library</span></code> generates the libraries and the headers. By default,
the C header file and the Fortran file is written directly into the source tree. This choice was
taken to improve building in cross-build environments, because the process cannot rely
on generated binaries being executable on the host system. The output folder can be overwritten
by setting <code class="docutils literal notranslate"><span class="pre">FORTRAN_OUTPUT_DIR</span></code> and <code class="docutils literal notranslate"><span class="pre">C_OUTPUT_DIR</span></code>.</p>
<p>By default, the name of the generated Fortran module is set to the name of the library. A different
name can be set with <code class="docutils literal notranslate"><span class="pre">FORTRAN_MODULE_NAME</span></code>.</p>
</section>
</section>
</section>


           </div>
          </div>
          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
        <a href="../getting_started/getting_started.html" class="btn btn-neutral float-left" title="Getting Started" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
        <a href="../glossary/glossary.html" class="btn btn-neutral float-right" title="Glossary" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
    </div>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2019, ETH Zurich.</p>
  </div>

  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
    provided by <a href="https://readthedocs.org">Read the Docs</a>.
   

</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script> 

</body>
</html>