File: essJCGSv3.tex

package info (click to toggle)
ess 5.11-1
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 11,108 kB
  • ctags: 1,759
  • sloc: lisp: 18,021; sh: 1,544; asm: 862; makefile: 307; xml: 193
file content (1004 lines) | stat: -rw-r--r-- 49,037 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
\documentclass{article}

\newif\ifMM\MMtrue
\MMfalse
\newif\ifdraft
% During writing: a draft:
%\drafttrue
% FINAL:
\draftfalse \ifMM\drafttrue\fi

\ifdraft  %% generate tableofcontents down to the \paragraph
\setcounter{tocdepth}{5}
\fi

\ifdraft
 \addtolength{\topmargin}{-1cm}
 \addtolength{\textheight}{+1cm}
\else%FINAL:
 \renewcommand{\baselinestretch}{1.5}
\fi
\addtolength{\oddsidemargin}{-0.5in}
\addtolength{\textheight}{0.2in}
\addtolength{\textwidth}{1in}
\ifMM\addtolength{\textheight}{2cm}\fi

%%%
\usepackage[authoryear,round]{natbib}
%or (if you have an unshiny latex installation)
%\newcommand{\citep}[1]{{\{\sf#1\}}}
%%%
\usepackage{alltt}

%% Postscript fonts
\usepackage{times}
%\usepackage{mathptm}

\usepackage{graphicx}
\ifdraft%figs in place
\else% figures/tables at end for submission
\usepackage{endfloat}
\fi

\ifx\pdfoutput\undefined
  %% Stuff wout hyperref
  \def\url#1{\stexttt{#1}} % To help fit in lines ?AJR: stextsf?
\else
  %% Stuff with hyperref
  \usepackage{hyperref}
  %%\hypersetup{backref,colorlinks=true,pagebackref=true,hyperindex=true}
  \hypersetup{backref,colorlinks=false,pagebackref=true,hyperindex=true}
\fi
%%---End of package requiring ---------- Own Definitions -------------

\newcommand*{\regstrd}{$^{\mbox{\scriptsize{\textregistered}}}$}
\newcommand*{\tm}{$^{\mbox{\scriptsize\sc tm}}$}
\newcommand*{\SAS}{\textsc{SAS}}
\newcommand*{\Splus}{\textsc{S-Plus}}
\newcommand*{\XLispStat}{\textsc{XLispStat}}
\newcommand*{\Stata}{\textsc{Stata}}
\newcommand*{\Rgui}{\textsc{Rgui}}
\newcommand*{\Perl}{\textsc{Perl}}
\newcommand*{\Fortran}{\textsc{Fortran}}
\newcommand*{\Scmt}[1]{\hbox{\qquad {\footnotesize \#\#} \textsl{#1}}}
\newtheorem{defn}{Definition}[section]
\newtheorem{ex}{Example}[section]

\newcommand{\stexttt}[1]{{\small\texttt{#1}}}
\newcommand{\ssf}[1]{{\small\sf{#1}}}
\newcommand{\elcode}[1]{\\{\stexttt{\hspace*{2em} #1}}\\}
\newcommand{\file}[1]{`\stexttt{#1}'}
\newcommand{\US}{{\char'137}}        % \tt _
\newcommand{\marpar}[1]{\marginpar{\raggedright#1}}
\newenvironment{Salltt}{\small\begin{alltt}}{\end{alltt}}

\newcommand{\emptyfig}{
\hspace*{42pt}\rule{324pt}{.25pt}\\
\hspace*{42pt}\rule{.25pt}{10pc}
\rule{316pt}{.25pt}
\rule{.25pt}{10pc}}

\ifMM\newcommand{\ESSfig}[1]{\centering{#1}}
\else\newcommand{\ESSfig}[1]{\centering\ifdraft\emptyfig\else{#1}\fi}
\fi

%% Use \begin{Comment} .. \end{Comment} for internal comments
\ifdraft
\newenvironment{Comment}{\begin{quote}\small\itshape }{\end{quote}}
%
\else  %% this requires
  \usepackage{verbatim}
  \let\Comment=\comment
  \let\endComment=\endcomment
\fi


%%--------------------------------------------------------------- Start Text

\title{Emacs Speaks Statistics: A multi-platform, multi-package
  development environment for statistical analysis}

%For blinded submission:
\author{anonymous}

%%For regular review:
%\author{A.J. Rossini \and Richard M. Heiberger \and Rodney A. Sparapani
%\and Martin M{\"a}chler \and Kurt Hornik \footnote{%
%%%
%    A.J. Rossini is Research Assistant Professor in the Department of
%    Biostatistics, University of Washington and Joint Assistant Member at
%    the Fred Hutchinson Cancer Research Center, Seattle, WA, USA
%    (E-mail: rossini@u.washington.edu);
%%%
%    Richard M. Heiberger is Professor in the Department of Statistics at
%    Temple University, Philadelphia, PA, USA (E-mail: rmh@temple.edu);
%%%
%    Rodney A. Sparapani is Senior Biostatistician in the Center for Patient
%    Care and Outcomes Research at the Medical College of Wisconsin,
%    Milwaukee, WI, USA (E-mail: rsparapa@mcw.edu);
%%%
%    Martin M{\"a}chler is Senior Scientist and Lecturer in the Seminar for
%    Statistics, ETH Zurich, Zurich, Switzerland
%    (E-mail: maechler@stat.math.ethz.ch);
%%%
%    Kurt Hornik is Professor in the Institut f{\"u}r Statistik,
%    Wirtschaftsuniversit{\"a}t Wien and the Institut f{\"u}r
%    Wahrscheinlichkeitstheorie und Statistik, Technische
%    Universit{\"a}t Wien, Vienna, Austria (E-mail:
%    Kurt.Hornik@r-project.org).  The authors would like to thank the
%    referees and associate editor for suggesting changes which
%    substantially improved the presentation of the paper.}}

\date{\today}
%%\date{$ $Date: 2002/10/07 16:33:27 $ - $Revision: 1.47 $ $\tiny printed \today}

\begin{document}

%%\ifpdf
%%  \DeclareGraphicsExtensions{.jpg,.pdf,.png,.mps}
%%\fi
%%%% To cite everything
%%\nocite{*}

\ifdraft
\setcounter{page}{0}
%%\newpage
\tableofcontents
\fi

\maketitle

\ifdraft{}%% large line skip -- not for draft
\else%FINAL:
 \renewcommand{\baselinestretch}{1.5}
 %%- \baselineskip=2pc
\fi

\begin{abstract}
  Computer programming is an important component of statistics
  research and data analysis.  It is a necessary skill for using
  sophisticated statistical packages and for writing custom scripts
  and software to perform data analysis using modern statistical
  methods.  Emacs Speaks Statistics (ESS) provides an intelligent and
  consistent interface between the user and statistics software.  ESS
  interfaces with \SAS, \Splus, R, and other statistics packages under
  the Unix, Microsoft Windows, and Apple Mac operating systems.  ESS
  extends the Emacs text editor to streamline the use and creation of
  statistical software.  ESS understands the syntax for numerous data
  analysis languages, provides consistent display and editing features
  across packages, and assists in the interactive or batch execution
  of statements by statistics packages.  We describe in detail the
  features which ESS provides for increasing statistical programming
  efficiency.
\end{abstract}

\noindent Keywords: Data Analysis, Programming, S, \SAS, \Splus, R,
\XLispStat, \Stata, BUGS, Open Source Software, Cross-platform User
Interface.

\section{Introduction}
\label{sec:introduction}

Statistical research activities, particularly data analysis and
communication, involve computing.  This is exemplified by the idea
that many daily statistical activities can be considered to be
\textit{programming with data} \citep{ChaJ98}.  The user interface,
which maps user behaviors into instructions to the computer, plays a central
role in facilitating these tasks.  A familiar, coherent, and
well-understood set of input behaviors can increase the efficiency of
statistical practice.  This paper introduces Emacs Speaks Statistics
(ESS) \citep{ESS}, a software package built upon the Emacs text editor
which provides a common interface to a variety of statistical packages
on the most common computing platforms.

%% AE's comments:

%        many statistical systems can only be used or are best used
%        through a textual interface.

%        textual interfaces can be hard to use; ESS is a tool for
%        making them easier to use.

%        preparing textual input for submission to a system is a form
%        of programming.

%        repeating analyses with minor variations and designing new
%        forms of analyses, perhaps using statistical languages, are
%        more advanced forms of programming

%        ESS can help the user be more effective with all forms of data
%        analysis programming.

Statistical package interfaces can generally be placed into 2
categories.  The first and older approach is to provide a command-line
or batch interface.  It has been claimed that these textual
interfaces, which provide extensive control over the data analysis
procedures being performed, is probably the best interface for
auditable research, and can facilitate reproducible research
\citep{claer:1997}.  The second approach is to provide a graphical
user interface (GUI); this approach either partially or completely
replaces the command-line with the use of toolbars and menus along
with dialog boxes for performing statistical procedures.  Data display
is provided through the use of a spreadsheet for entry and
modification.  Some GUI-based packages can also accept
scripts as input for regenerating statistical analyses.  ESS provides
a middle ground, with a focus on writing data analysis scripts and
programming code for statistical computing, while providing tools,
sometimes with associated GUI tools such as menus, toolbars, and
dialogs, for speeding up the associated programming tasks.

ESS is an interface to statistical packages that provides tools which
facilitate both statistical software development and data analysis.
ESS provides assistance with both the writing and evaluation of
analysis code for many types of statistical packages.  ESS currently
supports the S family of languages, including S
\citep{BecRCW88,ChaJH92,ChaJ98}, \Splus\regstrd\ \citep{Splus}, and R
\citep{ihak:gent:1996,R}; \SAS\regstrd\ \citep{SAS:8}; \Stata\
\citep{Stata:7.0}; \XLispStat\ \citep{Tier90} and its extensions Arc
\citep{Cook:Weisberg:1999} and ViSta \citep{youn:fald:mcfa:1992}; BUGS
\citep{BUGS}; and Omegahat \citep{DTLang:2000}.  ESS can be extended
to accommodate most statistical packages which provide either an
interactive command-line prompt for inputting instructions, or process
batch files for instructions.

We discuss how ESS enhances a statistician's daily activities by
presenting its features and showing how it facilitates statistical
computing.  Next, we describe the Emacs text editor, the underlying
platform on which ESS is built.  We conclude with a short history of
the development of ESS and goals for the future.

\section{ESS and Statistical Practice}
\label{sec:ess-extends-emacs}

Statistical programming is the writing of data analysis scripts or
general computer programs for data analysis and processing.  Both
forms will be referred to as programs and source code (the textual
result) or programming (the activity).  Although these programs could
be written in a general computer language such as \Fortran, C/C$++$,
or Java, it usually makes more sense for the the data analyst to work
in specialized scripting languages that support common statistical
procedures.  The statistical languages (for example \Splus, R, \SAS,
\Stata, and \XLispStat) usually do not include the same range of
sophisticated programming tools for writing and debugging code as the
general purpose languages.

ESS extends the Emacs text editor to provide a development environment
for statistical programming languages. In particular, ESS provides
many features which enhance the construction of data analysis scripts
and statistical programming.  The result is an environment with
features targeting the complementary goals of statistical programming
and data analysis.  It offers a single interface for a variety of
statistical computing tasks including interactive data analysis and
statistical programming.  ESS is able to provide a functional and
extensible interface which is uniform and consistent across multiple
statistical packages.  This is done by adding shortcuts and features
for accelerated editing of files as well as by interacting with the
particular statistical packages to provide, for example, control of
input/output, assistance with evaluation, and specialized display for
viewing, navigating, and editing of help and documentation files.

Some features do not require direct interaction with a statistics
package; these include textual script editing features such as
syntactic fontification and colorization of scripts, detection of
balanced and unbalanced parentheses and braces through highlighting,
code indentation for readability, commenting out regions, and indexing
of files by function.  The more interesting features, such as help
file parsing and viewing, object and function name completion,
evaluation of scripts, and interactive editing of data and objects,
require some interaction with the target statistical package either by
controlling interactive behavior or by batch processing.

\subsection{Editing}
\label{sec:ESS:features}

\paragraph{Source Code Formatting and Display}
The task of programming is made easier when language constructs (such
as reserved words, function calls, strings, and comments) are visually
identifiable and when lines of code are automatically indented to a
depth appropriate to their context (e.g., if--then clauses, loops).
ESS provides both of these to the programmer by including a
description of the syntax of each supported statistical language in
the form used by \stexttt{font-lock-mode} (for a brief discussion of
Emacs modes and font-lock-mode, see Section~\ref{sec:emacs}).  The
font selection and the indentation depth are automatically set while
the user is typing.  There are several options for mapping of colors
or fonts to each of the syntactic types.  We selected black-and-white
font-mapping for display here.  On a color terminal we might use
purple for the keywords, red for comments, green for matching parens,
and inverse-video purple for mismatched parens.  Emacs makes default
choices of colors and ESS provides several other optional schemes.

%Figure \ref{fig:font} shows an example of font-locking a complicated S
%statement.  The top panel shows an \stexttt{if} statement with a long
%expression in the condition and a multi-line consequence.  The keyword
%\stexttt{if} is shown in purple, the string \stexttt{"deltat"} in
%RosyBrown.  The comments are in red.  Everything else is in the
%standard font.  The consequence is indented and the continuations of
%the consequence are further indented.  The matching parentheses are
%shown in green.  The cursor is indicated by a solid box.  In the
%bottom panel, we replaced the matching parenthesis with an unbalanced
%bracket.  Emacs immediately marks that with the paren-mismatch font,
%bright purple in this example.  On a black and white terminal we would
%use bold, underline, italic, and reverse-video, rather than colors, to
%distinguish the fonts.

Figure \ref{fig:font} shows a black-and-white example of font-locking a
complicated S statement.  The top panel shows an \stexttt{if}
statement with a long expression in the condition and a multi-line
consequence.  The keyword \stexttt{if} is shown in an underlined font,
the string \stexttt{"deltat"} in an italic underlined font.  The
comments are in an italic font.  Everything else is in the standard
font.  The consequence is indented and the continuations of the
consequence are further indented.  The matching parentheses are marked
by a bold foreground and a shaded background.  The cursor is indicated
by a solid box.  In the bottom panel we replaced the matching
parenthesis with an unbalanced bracket.  Emacs immediately marks that
with the paren-mismatch font, bright purple on a color terminal.


%\begin{figure}[tbp]%h
%  \ESSfig{%
%    \includegraphics[angle=270,width=\textwidth]{font-cor-s}
%    \includegraphics[angle=270,width=\textwidth]{font-incor-s}%
%    }
%  \caption{We illustrate here with fonts and colors appropriate for a
%    color display.  On a black and white terminal we would use bold,
%    underline, italic, and reverse-video.  On a color terminal we
%    would use a selection of colors.}
%  \label{fig:font}
%\end{figure}
\begin{figure}[tbp]%h
  \ESSfig{%
    \includegraphics[angle=270,width=\textwidth]{fig1-top-bw}
    \includegraphics[angle=270,width=\textwidth]{fig1-bottom-bw}
  }
  \caption{Emacs detects mismatched parentheses.  The top panel
    shows matching parentheses highlighted in the same color.  The
    bottom panel shows mismatched parentheses, a left parenthesis and
    a right brace, highlighted in an attention grabbing color.  In
    this black and white rendition, the mismatch is in reverse video.
    On a color display, the mismatch is in bright purple.  }
  \label{fig:font}
\end{figure}

ESS uses the Emacs tools for reformatting code to match particular
styles.  For S, both common C format styles and locally customized
styles can be used to define the indentation level for nested
statements, location of open-braces (at the end or at the beginning of
a line), indentation offsets for if-then-else constructs, and similar
characteristics.  Functions exist to reformat blocks of code to match
the desired style.  Similar functions exist for XLispStat, though good
Lisp programming style is better defined and hence more restrictive.

Syntax highlighting can be used to enforce coding standards.  Figure
\ref{fig:hilock} illustrates a standard for \SAS\ programming, but
coding standards aids could be implemented for other languages such as
R or \Splus.
%that says all \stexttt{PROC} statements must use the
%\stexttt{DATA=datasetname} option.

%\begin{figure}[tbp]
%  \ESSfig{\includegraphics[angle=270,width=\textwidth]{hilock-sas}}
%  \caption{Enforce coding standards.  The standard here is
%    that all \stexttt{PROC} statements must use the
%    \stexttt{DATA=datasetname} option.  Lines that satisfy the
%    standard turn green, lines that don't turn red.
%    Ambiguous ones turn yellow.}
%  \label{fig:hilock}
%\end{figure}

\begin{figure}[tbp]
  \ESSfig{\includegraphics[angle=270,width=\textwidth]{hilock-sas-bw}}
  \caption{Enforce coding standards.  The standard here is
    that all \stexttt{PROC} statements must use the
    \stexttt{DATA=datasetname} option.  Lines that satisfy the
    standard are given a shaded background (green on a color screen),
    lines that don't are displayed in inverse video (red on a color
    screen).  Ambiguous ones are displayed in a lighter shade of
    inverse video (yellow on a color screen).}
  \label{fig:hilock}
\end{figure}

\paragraph{Help: Display, Navigation and Editing.}
ESS provides display and navigation tools for S (R and \Splus) and
\Stata\ help displays.  Displays of help files are done in a separate
buffer.  These buffers include single-key bookmarks of the main
sections of the help file, such as to jump to the function arguments or
examples sections.  Code in the latter is sent to a running S process
easily. % also with single key ``l''

In addition, ESS provides an R documentation mode (\stexttt{Rd-mode})
which assists in writing help files for R functions, objects, and
other topics.  \stexttt{Rd-mode} provides the ability to view and
execute code embedded in the help file in the same manner as ESS
handles code from any S language source file.  It provides syntax
highlighting and the ability to submit code directly to a running ESS
process, either R or \Splus, for evaluation and debugging.

\subsection{Interactive Processing}
\label{sec:interactive}

The increased popularity of exploratory data analysis as well as the
advent of simple GUIs has made interactive data analysis an important
component of statistical practice.  ESS uses three different
approaches for communicating with statistical packages: inferior
process control, which entails redirection of text input and output;
peer-to-peer style communication, which is currently accomplished on
MS Windows using DDE; and batch submission of whole or partial text
files containing analysis scripts.  For packages which do not support
any of these, the primary use of ESS is as an editing tool, with
interaction done using windowing cut-and-paste techniques; this still
provides beneficial editing features, some of which extend beyond
native editing environments.  Examples of this last situation include
Windows versions of SAS, Stata, and XLispStat.

Emacs has historically referred to processes under its control as
\textit{inferior}, accounting for the name inferior ESS
(\stexttt{iESS}) to denote the mode for interfacing with the
statistical package.  Figure~\ref{fig:ess-demo} shows the S language
program \stexttt{ess-demo.s} in the top buffer in \stexttt{ESS[S]}
mode and the executing R process in the bottom buffer \stexttt{*R*}.
The \stexttt{iESS} major mode of the \stexttt{*R*} buffer is crafted
for command-line editing.  This mode remembers and uses the command
history, allowing for the recall and searching of previously entered
commands.  Filename completion for local directories is also
available.

%\begin{figure}[tb]
%  \ESSfig{\includegraphics[angle=270,width=\textwidth]{ess-demo}}
%  \caption{Line-by-line execution of a command file. The cursor is
%    placed on a line in the \stexttt{ESS[S]} buffer and then with a single
%    key sequence
%    the line is sent to the \stexttt{*R*} buffer for
%    execution.  The output of the package goes directly to the
%    editable \stexttt{*R*} buffer.}
%  \label{fig:ess-demo}
%\end{figure}

\begin{figure}[tb]
  \ESSfig{\includegraphics[angle=270,width=\textwidth]{ess-demo-bw}}
  \caption{Line-by-line execution of a command file. The cursor is
    placed on a line in the \stexttt{ESS[S]} buffer and then with a
    single key sequence, the line is sent to the \stexttt{*R*} buffer
    for execution.  The output of the package goes directly to the
    editable \stexttt{*R*} buffer.}
  \label{fig:ess-demo}
\end{figure}

\paragraph{Source-level Debugging.}
ESS facilitates the editing of source code files, sets of commands
written for a statistical analysis package, and allows the user to
load and error-check small sections of source code into the package.
This is done through several mechanisms.  First, the presence of
unbalanced parentheses or mismatched/unterminated quotes is
immediately evident with syntactic highlighting of the source code.
Second, functions are provided for simple and consistent execution of
user-specified or natural units of the code (function definitions in S
or \XLispStat, \stexttt{PROC \dots\ RUN;} sections in \SAS).  An
error-free evaluation lets the user execute the next section of code;
if errors arise, the user edits the current unit and re-evaluates.
Once the code is verified, an entire buffer, or file, of code can be
sent to the package as a unit.  This file can also be used as a batch
file for routine analysis at a later time.  Finally, output from the
statistics package is normally captured directly by Emacs and placed
into a buffer from where it can be edited and searched.  Particular
forms of output such as requests for help pages and log-file output
can be diverted into special buffers with modes crafted to facilitate
reading.  These modes include tools for automatically placing the
cursor on the first \stexttt{ERROR}, for example in \SAS\ and S.

\paragraph{Interactive transcripts.}
A transcript records all commands entered by the analyst and the
corresponding text-based responses such as tables and comments
generated by the statistics package during an interactive statistical
analysis session.  Once a transcript file is generated, for example by
saving an \stexttt{iESS} buffer, \stexttt{transcript-mode} assists
with reuse of part or all of the entered commands.  ESS understands
the transcript's syntax, especially the potential prompt patterns used
during the interactive analysis.  ESS provides tools to facilitate
editing and re-evaluating the commands directly from the saved
transcript.  This is useful both for demonstration of techniques and
for reconstruction and auditing of data analyses.  Special ESS
functions can ``clean'' S language transcripts by isolating all input
lines and placing them in a new S language source file.  Transcript
cleaning facilitates the use of an exploratory interactive analysis
session to construct functions and batch files for routine analysis of
similar data sets.

\paragraph{Cooperation across Multiple Tools.}
Statistical packages are intended for either general or specialized
forms of statistical analyses.  The specialized statistical packages
can be far more efficient for their intended activities, but this is
balanced by their inability to perform a wide range of general
statistical functions.  Tightly coupled inter-operability between
general and specialized packages rarely exists, but such a facility is
often desired.  For example, a general purpose package such as R does
not perform Bayesian analyses as easily as BUGS does.  On the other
hand, BUGS lacks breadth in the range of analyses and results it can
generate.  For this reason, BUGS is often distributed with R packages,
like the diagnostic packages CODA and BOA, which assist with importing
and analyzing the results in R.  Another point of contention is the
difference in the interfaces between general packages and specialized
packages.  ESS helps by providing a single point of contact to both
tools, though the typical interfaces (interactive for R, batch for
BUGS) can be different.

\paragraph{Concurrent Use of Multiple Machines and Operating Systems.}
It can be useful to have multiple statistical processes running
simultaneously, either on a single machine or a variety of machines.
This capability assists with large-scale numerical simulations as well
as code design and testing across multiple versions of statistical
software packages.

ESS provides transparent facilities for editing files and running
programs which might reside on numerous remote machines during the
same session.  The remote machine could be a different platform than
the local machine.  This is accomplished through the use of Emacs
capabilities for transparent access to remote files over a network.
This means that the user views, edits, and saves files on a remote
machine exactly as if they were on the local machine.

This relaxes the requirement that statistics programs be available on
the local machine.  ESS provides both transparent editing of files and
execution of statistics packages on a remote machine with
\stexttt{iESS[S]} or \stexttt{iESS[SAS]}.  All the editing and
interaction features described for the local machine work equally well
on the remote machine.  The interaction, including all the unique
features of working with ESS, appears to the user as if the program
were running on the local machine.  If the X11\tm Windowing system is
running on the local machine, it is even possible to bring up visual
displays and graphics from remote Unix systems onto a local display.

\paragraph{Interactive S family.}
ESS for S family statistical languages, \stexttt{iESS[S]}, replaces
the \Splus\ Commands window or the R GUI window.  In addition to
running the S family language process, \stexttt{iESS[S]} mode provides
the same editing features, including syntactic highlighting and
string-search, as the editing mode \stexttt{ESS[S]}.  It also provides
an interactive history mechanism; transcript recording and editing;
and the ability to re-submit the contents of a multi-line command to
the executing process with a single keystroke.  \stexttt{iESS[S]} is
used with S, \Splus, and R on Unix and with Sqpe and R on Windows.

The \Splus\ GUI on Windows can be used as a DDE server.  There are two
advantages to using even this limited communication with the \Splus\
GUI through ESS.  First, through \stexttt{ESS[S]} mode the user gets
the full editing capabilities of Emacs.  Second, S language commands
are sent from the editing mode \stexttt{ESS[S]} buffer and from
transcript buffers from previous S sessions directly to the GUI
Commands window with the same Emacs key sequences as are used with ESS
on Unix.  Hence the user can work in a powerful editing environment
and is protected from the delay and ergonomic challenges of using the
mouse for copy and paste operations across windows.

For languages in the S family, ESS provides object-name completion of
both user- and system-defined functions and data.  ESS can dump and
save objects (user- and system-generated) into formatted text files,
and reload them (possibly after editing).

\paragraph{Interactive \SAS.}
\stexttt{iESS[SAS]} is a mode that allows text-based \stexttt{PROC} by
\stexttt{PROC} interaction with an inferior buffer running an
interactive \SAS\ session on either the local or a remote computer.
\stexttt{iESS[SAS]} mode works by redirecting standard input and
output from \SAS\ to ESS.  Currently, the \stexttt{iESS[SAS]} mode can
run on any computer, but the \SAS\ process it is controlling must be
running on a Unix machine.  This process is very efficient for dial-up
network connections to a remote computer with \SAS\ installed.  The
resulting interface is similar to the SAS character terminal
interface, but with Emacs key sequences.

\paragraph{Interactive BUGS.}
BUGS software performs Markov Chain Monte Carlo integration.  ESS
supports interactive processing of BUGS commands.

\subsection{Batch File Processing}
\label{sec:batch-file}

Batch file processing with statistical analysis packages is a better
choice than interactive processing when the execution times are longer
than the user is willing to wait as well as for regularly updated
statistical reports and figures.  ESS provides a means to shorten the
debugging cycle for writing code intended for batch evaluation by
containing the whole process, both writing and evaluation, within
Emacs.

\paragraph{Batch \SAS.}
\label{sec:sas-batch}

\SAS\ is a popular choice for processing and analyzing large amounts
of data.  Interactive \SAS\ is rarely used in these
situations due to the length of time involved.  Instead, a file
containing \SAS\ commands is created and \SAS\ executes these commands
in the background, or batch, while the user moves on to other
activities.

ESS facilitates \SAS\ batch with \stexttt{ESS[SAS]}, the mode for
files with the \stexttt{sas} extension.  ESS defines \SAS\ syntax so
that \stexttt{font-lock-mode} can highlight statements, procedures,
functions, macros, datasets, comments and character string literals in
\SAS\ programs.  Optionally, the same language features are
highlighted in the \SAS\ log with the addition of log notes, warnings
and error messages.

For files with the \stexttt{sas} extension, ESS binds the function
keys in \stexttt{ESS[SAS]} mode to match the definitions used by \SAS\
Display Manager.  These definitions are optionally available in all
modes.  They are particularly useful when viewing \SAS\ log and
listing files (with extensions of \stexttt{log} and \stexttt{lst}
respectively).

Only one function key press is needed to submit a \SAS\ batch process.
Other function keys open the \SAS\ program file, the log buffer, and
the listing buffers.  When accessed in this manner, the \SAS\ log and
listing buffers are automatically updated since they may have
been appended or over-written by the \SAS\ batch process.  In
addition, the \SAS\ log is searched for error messages and the error
messages, if any, are sequentially displayed with consecutive key
presses.

Another function key opens a \SAS\ permanent dataset for editing or
viewing.  An option is provided so that the tab and return keys
operate in typewriter fashion like they do in \SAS\ Display Manager.
This option also defines a key to move the cursor to a previous
tab-stop and delete any characters between its present position and
the tab-stop.  This is a \SAS\ Display Manager feature that is not
typically available in Emacs.

The \SAS\ batch process runs on the computer where the \SAS\ program
resides.  This is important because any \SAS\ permanent datasets
referenced in a \SAS\ program only exist on the computer running \SAS.
If the \SAS\ program resides on a remote computer, then the log and
listing are also accessed remotely.  The net result is that running
\SAS\ batch on remote computers is nearly transparent to the ESS user.

\paragraph{Batch BUGS.}
The BUGS interactive capability is not often used since MCMC analyses
can be very time-consuming; hence, most BUGS programs are executed as
batch processes.  ESS facilitates BUGS batch with \stexttt{ESS[BUGS]},
the mode for files with the \stexttt{bug} extension.  ESS provides 4
features.  First, BUGS syntax is described to allow for proper
fontification of statements, distributions, functions, commands and
comments in BUGS model files, command files and log files.  Second,
ESS creates templates for the command file from the model file so that
a BUGS batch process can be defined by a single file.  Third, ESS
provides a BUGS batch script that allows ESS to set BUGS batch
parameters.  Finally, key sequences are defined to create a command
file and submit a BUGS batch process.

\paragraph{Batch S.}
ESS provides 2 facilities for batch processing of S family language
files.  The first is to execute the contents of a file using
buffer-evaluation.  This differs from interactive processing only by
the number of commands being evaluated; errors can be found by
examining the resulting transcript.  The second is the load-source
mechanism, which provides a means of jumping to errors in the source
file, but doesn't display the evaluated commands in the transcript.
These mechanisms provide different tools for debugging the source
files.

\section{Emacs, the Basis for ESS}
\label{sec:emacs}

Emacs is a mature, powerful, and extensible text editing system which
is freely available, under the GNU General Public License (GPL), for a
large number of platforms, including most Unix\regstrd distributions,
Microsoft Windows\regstrd\ and Apple Mac\tm\ OS.  There are two
open-source implementations of Emacs: GNU Emacs \citep{GNU-Emacs} and
XEmacs \citep{XEmacs}.  Emacs shares many features with word
processors, and some characteristics with operating systems, including
many facilities which go beyond ordinary text editing.  More important
to our goals, Emacs can control and interact with other programs.  We
quickly describe features which enable ESS.  These are not necessarily
unique to Emacs, but it was the first extensible editor to have them
all available.

\paragraph{Keyboard and Mouse Input.}

When Emacs was originally written, character-based terminals were the
most advanced method of computer access.  Common Emacs commands were
mapped to key sequences, creating keyboard shortcuts for convenience.
Over the last decade, Emacs has been extended to use graphical
windowing systems, such as X11, Microsoft Windows, and Apple Mac
OS, which allow additional forms of input, for example using a mouse,
and which encourage multiple applications to share a single display.
Presently, Emacs is more often used with a GUI, with commands bound to
mouse actions, but having commands also associated with key sequences
is an important ergonomic and time-saving feature.  Emacs menus and
toolbars on the display screen allow mouse access to frequently used
actions and provide a graphical alternative when the user does not
know or can not recall a key sequence; these are also subject to
user-customization.

\paragraph{Buffers give Emacs control.}
Emacs buffers are the interface between the user and computer.  They
can be considered to be a collection of scratch pads that both the
user and computer can read, write, and respond to.  The user can
simultaneously edit many files and control numerous programs by
opening multiple buffers.  With disk files, the working copy of the
opened file is placed in an Emacs buffer where it can be viewed and
edited either by the user or automatically by Emacs or another program
under the control of Emacs.  Emacs can save a backup of the contents
to disk at specified intervals.  Emacs presents buffer contents in
ways which optimize reading and navigation activities.  One example of
program control is the embedding of the interactive operating system
command line interpreter, called a shell, within Emacs.  Variations on
this theme are used to control programs such as statistical packages
which take input from and provide output to the command-line.  The
resulting buffers provide a copy of the entire transcript of the
interaction, which can be edited and searched while the program
executes.

\paragraph{Major and Minor Modes.}
Emacs capabilities are extended by loading files containing commands
and functions written in Emacs Lisp (elisp) \citep{RChassell1999},
which is a dialect of Lisp \citep{PGraham:1996}.  Emacs commands can
be called interactively by pressing a key sequence mapped to the
command or by name.  The most important extensions to Emacs take the
form of modes, which provide significant and specific enhancements to
the user interface behavior.  For
example, \stexttt{font-lock-mode} allows Emacs to highlight, with
fonts or colors, the syntax of a programming language whose
characteristics are described within a major mode like
\stexttt{ESS[S]}.  A full description of the role that
modes play is beyond the scope of this paper; there is documentation
built into Emacs describing both the range of modes, how they work,
and how to implement them.  The critical point is that this flexible
and extendable facility forms the basis for ESS's implementation.

%Major modes provide a customized environment consisting of mapped key
%sequences and associated commands for performing tasks such as file
%editing, reading mail, or browsing disk directories.  Only one major
%mode can be active for a given buffer at any time.  Major modes also
%can be written to intelligently control other programs such as
%statistics packages.  Major modes for file editing are often
%determined by the file type or extension, i.e.  the characters at the
%end of the file name that follow a period like \stexttt{txt},
%\stexttt{s}, or \stexttt{sas}.  Examples of this kind of major mode
%are \stexttt{ESS[S]} and \stexttt{ESS[SAS]}.  Major modes understand a
%file's syntax and grammar and therefore provide intelligent actions
%such as automatic indentation; navigation in units of characters,
%words, lines, sentences, paragraphs, function definitions, and pages;
%syntax-based fontification and colorization; and reformatting based on
%programmed conventions.

%Minor modes provide complementary services that that are applicable
%across major modes.  Many minor modes can be active at once.  For
%example, \stexttt{font-lock-mode} allows Emacs to highlight, with
%fonts or colors, the syntax of a programming language whose
%characteristics are described within a major mode like
%\stexttt{ESS[S]}.  The \stexttt{overwrite-mode} determines whether
%typed characters replace the existing text or are inserted at the
%cursor.  Minor modes can emulate the key sequences used by another
%editor such as \stexttt{vi}.  In addition, they can be used to perform
%version control operations and many other operations which are nearly
%identical across file types.


\paragraph{Editing Extensions.}
Most programming and documentation tasks consist of editing text.
These tasks can be enhanced by contextual highlighting and recognition
of special reserved words appropriate to the programming language in
use.  In addition, Emacs also supports folding, outlining, tags, and
bookmarks, all of which assist with maneuvering around a file.  Emacs
shares many features with word processing programs and cooperates with
markup-language document preparation systems such as \LaTeX,
\textsc{html}, or \textsc{xml}.

Tracking changes to a text file made by multiple users, potentially in
different locations, is the job of source-code control programs.
Emacs interacts with standard source-code control programs such as
CVS, RCS, and SCCS through minor modes such as \stexttt{vc-mode}.
These source-code control systems facilitate documenting and tracking
edits and changes to a file.  More importantly, they allow for
branching and merging of versions so that material present in an older
version of the file can be recovered and inserted into a newer version
in a fairly easy manner.  This can be important in tracing down the
actual source file used to perform a particular prior data analysis.

Comparison of files, two or three drafts of a paper for example, is
simplified by \stexttt{ediff}.  An example is shown in Figure
\ref{fig:ediff}.  The lines that are similar are highlighted in the two
buffers, one for each file, and the specific words that mismatch are
highlighted in a contrasting color.  \stexttt{ediff} has many tools
for working with the differences in files and in entire directories.
When combined with the \stexttt{patch} utility or a source-code control system,
it provides the user with the ability to insert, delete or modify only
the differing portions of text files.  This can be critical for the
data analyst who has 2 or more partial versions of an analysis in separate
files which need to be appropriately merged.
%This can be critical for the
%data analyst, where 2 different versions of the same data analysis
%each contain sections which are accurate, but need to be appropriately
%merged to document the final correct analysis.
%\begin{figure}[tbp]
%  \ESSfig{\includegraphics[angle=270,width=\textwidth]{ediff-sas}}
%  \caption{Ediff of two versions of a file.}
%  \label{fig:ediff}
%\end{figure}
\begin{figure}[tbp]
  \ESSfig{\includegraphics[angle=270,width=\textwidth]{ediff-sas-bw}}
  \caption{Ediff of two versions of a file.  Similar lines in the two files
are detected.  The entire line is highlighted in each file and the differences
are highlighted in a contrasting color.}
  \label{fig:ediff}
\end{figure}

Emacs has many other important features.  Emacs provides file-manager
capabilities, such as \stexttt{dired} (\textit{directory editor}) and
\stexttt{speedbar} (an index into the structure of a file), both of
which interface to the computer's directory structure.  Emacs stores
the complete history of commands issued in an editing session,
allowing a flexible and fairly complete undo capability.  More
importantly, for modes which control processes, the process input
history is stored for recall as well as for later editing for printing
or re-use.  Emacs also includes web browsers, mail/newsgroup readers,
and spell checking.

Mechanisms for both open (\stexttt{ange-ftp} and \stexttt{EFS} use
ftp) and secure (\stexttt{tramp} uses scp or ssh) remote file access
are available.  Emacs can also monitor and control remote processes
running in a shell buffer.

In addition to being an extremely powerful editor, Emacs also includes
capabilities usually found in an operating system.  Thus, it provides
a strong foundation for constructing an integrated development
environment focused on the needs of statisticians.  Emacs' power,
flexibility, portability, and extensibility make it a solid platform
on which to construct a statistical analysis user interface.

\section{History of ESS}
\label{sec:ESS:history}

ESS would never have existed without GNU Emacs, the editing system for
which Richard Stallman won a MacArthur Foundation Fellowship in 1990.
Emacs is one of the oldest popular editors and has a long history of
being a programmer's editor.  Many statisticians got their first taste
of the power of Emacs with \Fortran\ mode which was introduced in
1986.  As statisticians' preferences changed from general-purpose
languages such as \Fortran\ to specialized statistical analysis
languages and packages like S and \SAS, Emacs modes soon followed.

The ESS environment is built on the open-source projects of many
contributors, dating back over 10 years.  Doug Bates and Ed Kademan
wrote S-mode in 1989 to edit S and \Splus\ files in GNU Emacs.  Frank
Ritter and Mike Meyer added features, creating version 2.  Meyer and
David Smith made further contributions, creating version 3.  For
version 4, David Smith provided significant enhancements to allow for
powerful process interaction.

John Sall wrote GNU Emacs macros for \SAS\ source code in 1990.  Tom
Cook added functions to submit jobs, review listing and log files, and
produce basic views of a dataset, thus creating a SAS-mode which was
distributed in 1994.  A.J. Rossini extended this SAS-mode to work with
XEmacs.

Returning to the S languages, we note that in 1994, Rossini extended
S-mode to support XEmacs.  In 1995, together with extensions written
by Martin M{\"a}chler, this became version 4.7 and supported S, \Splus,
and R.  Kurt Hornik contributed a mode for editing R documentation
files shortly after.

During 1997, Rossini merged S-mode and SAS-mode into a single Emacs
package for statistical programming; the product of this marriage was
called ESS version 5.  Richard M. Heiberger designed the inferior mode
for interactive \SAS\ and SAS-mode was further integrated into ESS.
Thomas Lumley's Stata mode, written in 1996, was also folded into ESS.
More changes were made to support additional statistical languages,
particularly \XLispStat.

ESS initially worked only with Unix statistics packages that used
standard-input and standard-output for both the command-line interface
and batch processing.  ESS could not communicate with statistical
packages that did not use this protocol.  This changed in 1998 when
Brian Ripley demonstrated use of the Windows Dynamic Data Exchange
(DDE) protocol with ESS.  Heiberger then used DDE to provide
interactive interfaces for Windows versions of \Splus.  In 1999,
Rodney A. Sparapani and Heiberger implemented \SAS\ batch for ESS,
which relies on files rather than standard-input/standard-output, on
Unix, Windows and Mac.  In 2001, Sparapani added BUGS batch file
processing to ESS for Unix and Windows.  In 2002, Aki Vehtari
contributed BUGS interactive processing to ESS for Unix and Windows.

This history is summarized in Table \ref{tab:timeline}.

\begin{table}[htbp]
  \centering
  {\scriptsize
  \begin{tabular}{c ll c ll}
\hline
    Year  \\
\hline
         & \multicolumn{2}{c}{S-mode}               && \multicolumn{2}{c}{SAS-mode} \\
\cline{2-3} \cline{5-6}
    1989 & v.1   & (GNU Emacs, Unix, S/S+)          &&  \\
    1990 &       &                                  &&     & (GNU Emacs, Unix, SAS editing) \\
    1991 & v.2   & (GNU Emacs, Unix, S/S+)          && \\
    1993 & v.3   & (GNU Emacs, Unix, S/S+)          && \\
    1994 & v.4   & (GNU Emacs/XEmacs, Unix, S/S+)   && v.1 & (GNU Emacs, Unix, SAS batch) \\
    1995 & v.4.7 & (GNU Emacs/XEmacs, Unix, S/S+/R) && v.2 & (GNU Emacs/XEmacs, Unix, SAS batch) \\
         &       &                                  &&     & \\ \cline{2-6}
%\\ [-3.5ex] \cline{2-6}
         & \multicolumn{5}{c}{Emacs Speaks Statistics (ESS)} \\
    \cline{2-6}
         &\multicolumn{2}{c}{Emacs, Operating Systems}     &&\multicolumn{2}{c}{Additional Functionality}\\
\cline{2-3} \cline{5-6}
    1997 & v.5.0    & (GNU Emacs/XEmacs, Unix)             &&& Stata, XLispStat, SAS interactive \\
    1998 & v.5.1.1  & (GNU Emacs/XEmacs, Unix/Windows)     &&& S+elsewhere; Windows: S+/R\\
    1999 & v.5.1.10 & (GNU Emacs/XEmacs, Unix/Windows/Mac) &&& SAS batch; Omegahat \\
    2001 & v.5.1.19 & (GNU Emacs/XEmacs, Unix/Windows/Mac) &&& Unix/DOS: BUGS batch; Mac: R \\
    2002 & v.5.2.0  & (GNU Emacs/XEmacs, Unix/Windows/Mac) &&& Unix/DOS: BUGS interactive \\
    \hline
  \end{tabular}
  }
  \caption{History and Ancestors of ESS}
  \label{tab:timeline}
\end{table}

\section{Discussion}
\label{sec:concl}

ESS provides an enhanced, powerful interface for efficient interactive
data analysis and statistical programming.  It allows the user
complete control over the communications among the files in which the
analysis is specified, the statistical process doing the computation,
and the output.  Because all activities are contained within the same
user environment and hence accessed with the same editing and
searching concepts and the same key sequences, user efficiency is
increased.  ESS is completely customizable to satisfy individual
desires for interface styles and code formats, and can be easily
extended to support other statistical languages and data analysis
packages.

\bibliographystyle{plainnat}
%\pdfbookmark[1]{References}{section.7}
\addcontentsline {toc}{section}{\numberline {}References}
\bibliography{essJCGSv3}

\clearpage

\appendix
\section{Appendix: ESS Resources on the Internet}
\addcontentsline {toc}{section}{\numberline {}ESS Resources on the Internet}
\label{sec:access}

\paragraph{Latest Version.}

ESS is constantly in flux.  New versions of statistical packages,
Emacs and operating systems require new releases of ESS to support
them.  The latest stable version of ESS can be found on the web at
\url{http://software.biostat.washington.edu/statsoft/ess/}.  To get
help with problems, send e-mail to \url{ess-help@stat.math.ethz.ch}.
The latest development, hence unstable, version can be obtained by
anonymous CVS.  First type:

\stexttt{cvs -d
  :pserver:anoncvs@software.biostat.washington.edu:/var/anoncvs login}

You will be prompted for a password which is ``\stexttt{anoncvs}''.
Then type:

\stexttt{cvs -d
  :pserver:anoncvs@software.biostat.washington.edu:/var/anoncvs co
  ess}

\paragraph{Additional documentation.}

An expanded version of the present paper is in \citep{RMHHS:2001}.  A
general introduction and usage instructions can be found in
\citep{heiberger:dsc:2001}; in addition, one which is more focused on
\SAS\ can be found in \citep{heiberger:philasugi:2001}.

The documentation that comes with ESS provides details of its
implementation as well as examples of its use.  Start with the file
\stexttt{doc/ESS{\US}intro.tex} for an overview and elementary
introduction.  Complete documentation is in
\stexttt{doc/html/ess.html} or by the \stexttt{info} system with\\
\stexttt{C-h i C-s ESS RET RET}.

\end{document}

%%% Local Variables:
%%% mode: latex
%%% TeX-master: t
%%% End: