File: DBI-proposal.html

package info (click to toggle)
dbi 1.3.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,740 kB
  • sloc: makefile: 2
file content (1166 lines) | stat: -rw-r--r-- 58,470 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
<!DOCTYPE html>

<html>

<head>

<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />

<meta name="viewport" content="width=device-width, initial-scale=1" />

<meta name="author" content="David James" />

<meta name="date" content="2000-03-16" />

<title>A Common Interface to Relational Databases from R and S – A Proposal</title>

<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
  var i, h, a;
  for (i = 0; i < hs.length; i++) {
    h = hs[i];
    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
    a = h.attributes;
    while (a.length > 0) h.removeAttribute(a[0].name);
  }
});
</script>

<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>



<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
html { -webkit-text-size-adjust: 100%; }
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } 
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } 
code span.at { color: #7d9029; } 
code span.bn { color: #40a070; } 
code span.bu { color: #008000; } 
code span.cf { color: #007020; font-weight: bold; } 
code span.ch { color: #4070a0; } 
code span.cn { color: #880000; } 
code span.co { color: #60a0b0; font-style: italic; } 
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } 
code span.do { color: #ba2121; font-style: italic; } 
code span.dt { color: #902000; } 
code span.dv { color: #40a070; } 
code span.er { color: #ff0000; font-weight: bold; } 
code span.ex { } 
code span.fl { color: #40a070; } 
code span.fu { color: #06287e; } 
code span.im { color: #008000; font-weight: bold; } 
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } 
code span.kw { color: #007020; font-weight: bold; } 
code span.op { color: #666666; } 
code span.ot { color: #007020; } 
code span.pp { color: #bc7a00; } 
code span.sc { color: #4070a0; } 
code span.ss { color: #bb6688; } 
code span.st { color: #4070a0; } 
code span.va { color: #19177c; } 
code span.vs { color: #4070a0; } 
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } 
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
  var sheets = document.styleSheets;
  for (var i = 0; i < sheets.length; i++) {
    if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
    try { var rules = sheets[i].cssRules; } catch (e) { continue; }
    var j = 0;
    while (j < rules.length) {
      var rule = rules[j];
      // check if there is a div.sourceCode rule
      if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
        j++;
        continue;
      }
      var style = rule.style.cssText;
      // check if color or background-color is set
      if (rule.style.color === '' && rule.style.backgroundColor === '') {
        j++;
        continue;
      }
      // replace div.sourceCode by a pre.sourceCode rule
      sheets[i].deleteRule(j);
      sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
    }
  }
})();
</script>



<style type="text/css">

div.csl-bib-body { }
div.csl-entry {
clear: both;
}
.hanging div.csl-entry {
margin-left:2em;
text-indent:-2em;
}
div.csl-left-margin {
min-width:2em;
float:left;
}
div.csl-right-inline {
margin-left:2em;
padding-left:1em;
}
div.csl-indent {
margin-left: 2em;
}
</style>

<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap; 
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }

code > span.kw { color: #555; font-weight: bold; } 
code > span.dt { color: #902000; } 
code > span.dv { color: #40a070; } 
code > span.bn { color: #d14; } 
code > span.fl { color: #d14; } 
code > span.ch { color: #d14; } 
code > span.st { color: #d14; } 
code > span.co { color: #888888; font-style: italic; } 
code > span.ot { color: #007020; } 
code > span.al { color: #ff0000; font-weight: bold; } 
code > span.fu { color: #900; font-weight: bold; } 
code > span.er { color: #a61717; background-color: #e3d2d2; } 
</style>




</head>

<body>




<h1 class="title toc-ignore">A Common Interface to Relational Databases
from R and S – A Proposal</h1>
<h4 class="author">David James</h4>
<h4 class="date">March 16, 2000</h4>



<p>For too long S and similar data analysis environments have lacked
good interfaces to relational database systems (RDBMS). For the last
twenty years or so these RDBMS have evolved into highly optimized
client-server systems for data storage and manipulation, and currently
they serve as repositories for most of the business, industrial, and
research “raw” data that analysts work with. Other analysis packages,
such as SAS, have traditionally provided good data connectivity, but S
and GNU R have relied on intermediate text files as means of importing
data (but see <span class="citation"><em><span>R</span> Data
Import/Export</em> (2001)</span> and <span class="citation"><em>Using
Relational Database Systems with <span>R</span></em> (2000)</span>.)
Although this simple approach works well for relatively modest amounts
of mostly static data, it does not scale up to larger amounts of data
distributed over machines and locations, nor does it scale up to data
that is highly dynamic – situations that are becoming increasingly
common.</p>
<p>We want to propose a common interface between R/S and RDBMS that
would allow users to access data stored on database servers in a uniform
and predictable manner irrespective of the database engine. The
interface defines a small set of classes and methods similar in spirit
to Python’s DB-API, Java’s JDBC, Microsoft’s ODBC, Perl’s DBI, etc., but
it conforms to the “whole-object” philosophy so natural in S and R.</p>
<div id="sec:distr" class="section level1">
<h1>Computing with Distributed Data</h1>
<p>As data analysts, we are increasingly faced with the challenge of
larger data sources distributed over machines and locations; most of
these data sources reside in relational database management systems
(RDBMS). These relational databases represent a mature client-server
distributed technology that we as analysts could be exploiting more that
we’ve done in the past. The relational technology provides a
well-defined standard, the ANSI SQL-92 <span class="citation"><em><span class="nocase">X/Open CAE Specification: SQL and RDA</span></em>
(1994)</span>, both for defining and manipulating data in a highly
optimized fashion from virtually any application.</p>
<p>In contrast, S and Splus have provided somewhat limited tools for
coping with the challenges of larger and distributed data sets (Splus
does provide an <code>import</code> function to import from databases,
but it is quite limited in terms of SQL facilities). The R community has
been more resourceful and has developed a number of good libraries for
connecting to mSQL, MySQL, PostgreSQL, and ODBC; each library, however,
has defined its own interface to each database engine a bit differently.
We think it would be to everybody’s advantage to coordinate the
definition of a common interface, an effort not unlike those taken in
the Python and Perl communities.</p>
<p>The goal of a common, seamless access to distributed data is a modest
one in our evolution towards a fully distributed computing environment.
We recognize the greater goal of distributed computing as the means to
fully integrate diverse systems – not just databases – into a truly
flexible analysis environment. Good connectivity to databases, however,
is of immediate necessity both in practical terms and as a means to help
us transition from monolithic, self-contained systems to those in which
computations, not only the data, can be carried out in parallel over a
wide number of computers and/or systems <span class="citation">Temple
Lang (2000)</span>. Issues of reliability, security, location
transparency, persistence, etc., will be new to most of us and working
with distributed data may provide a more gradual change to ease in the
ultimate goal of full distributed computing.</p>
</div>
<div id="sec:rs-dbi" class="section level1">
<h1>A Common Interface</h1>
<p>We believe that a common interface to databases can help users easily
access data stored in RDBMS. A common interface would describe, in a
uniform way, how to connect to RDBMS, extract meta-data (such as list of
available databases, tables, etc.) as well as a uniform way to execute
SQL statements and import their output into R and S. The current
emphasis is on querying databases and not so much in a full low-level
interface for database development as in JDBC or ODBC, but unlike these,
we want to approach the interface from the “whole-object” perspective
<span class="citation">Chambers (1998)</span> so natural to R/S and
Python – for instance, by fetching all fields and records simultaneously
into a single object.</p>
<p>The basic idea is to split the interface into a front-end consisting
of a few classes and generic functions that users invoke and a back-end
set of database-specific classes and methods that implement the actual
communication. (This is a very well-known pattern in software
engineering, and another good verbatim is the device-independent
graphics in R/S where graphics functions produce similar output on a
variety of different devices, such X displays, Postscript, etc.)</p>
<p>The following verbatim shows the front-end:</p>
<pre><code>&gt; mgr &lt;- dbManager(&quot;Oracle&quot;)
&gt; con &lt;- dbConnect(mgr, user = &quot;user&quot;, passwd = &quot;passwd&quot;)
&gt; rs &lt;- dbExecStatement(con,
          &quot;select fld1, fld2, fld3 from MY_TABLE&quot;)
&gt; tbls &lt;- fetch(rs, n = 100)
&gt; hasCompleted(rs)
[1] T
&gt; close(rs)
&gt; rs &lt;- dbExecStatement(con,
          &quot;select id_name, q25, q50 from liv2&quot;)
&gt; res &lt;- fetch(rs)
&gt; getRowCount(rs)
[1] 73
&gt; close(con)</code></pre>
<p>Such scripts should work with other RDBMS (say, MySQL) by replacing
the first line with</p>
<pre><code>&gt; mgr &lt;- dbManager(&quot;MySQL&quot;)</code></pre>
<div id="sec:rs-dbi-classes" class="section level2">
<h2>Interface Classes</h2>
<p>The following are the main RS-DBI classes. They need to be extended
by individual database back-ends (MySQL, Oracle, etc.)</p>
<dl>
<dt><code>dbManager</code></dt>
<dd>
<p>Virtual class<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a> extended by actual database managers, e.g.,
Oracle, MySQL, Informix.</p>
</dd>
<dt><code>dbConnection</code></dt>
<dd>
<p>Virtual class that captures a connection to a database instance<a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a>.</p>
</dd>
<dt><code>dbResult</code></dt>
<dd>
<p>Virtual class that describes the result of an SQL statement.</p>
</dd>
<dt><code>dbResultSet</code></dt>
<dd>
<p>Virtual class, extends <code>dbResult</code> to fully describe the
output of those statements that produce output records, i.e.,
<code>SELECT</code> (or <code>SELECT</code>-like) SQL statement.</p>
</dd>
</dl>
<p>All these classes should implement the methods <code>show</code>,
<code>describe</code>, and <code>getInfo</code>:</p>
<dl>
<dt><code>show</code></dt>
<dd>
<p>(<code>print</code> in R) prints a one-line identification of the
object.</p>
</dd>
<dt><code>describe</code></dt>
<dd>
<p>prints a short summary of the meta-data of the specified object (like
<code>summary</code> in R/S).</p>
</dd>
<dt><code>getInfo</code></dt>
<dd>
<p>takes an object of one of the above classes and a string specifying a
meta-data item, and it returns the corresponding information
(<code>NULL</code> if unavailable).</p>
<pre><code>&gt; mgr &lt;- dbManager(&quot;MySQL&quot;)
&gt; getInfo(mgr, &quot;version&quot;)
&gt; con &lt;- dbConnect(mgr, ...)
&gt; getInfo(con, &quot;type&quot;)</code></pre>
</dd>
</dl>
<p>The reason we implement the meta-data through <code>getInfo</code> in
this way is to simplify the writing of database back-ends. We don’t want
to overwhelm the developers of drivers (ourselves, most likely) with
hundreds of methods as in the case of JDBC.</p>
<p>In addition, the following methods should also be implemented:</p>
<dl>
<dt><code>getDatabases</code></dt>
<dd>
<p>lists all available databases known to the
<code>dbManager</code>.</p>
</dd>
<dt><code>getTables</code></dt>
<dd>
<p>lists tables in a database.</p>
</dd>
<dt><code>getTableFields</code></dt>
<dd>
<p>lists the fields in a table in a database.</p>
</dd>
<dt><code>getTableIndices</code></dt>
<dd>
<p>lists the indices defined for a table in a database.</p>
</dd>
</dl>
<p>These methods may be implemented using the appropriate
<code>getInfo</code> method above.</p>
<p>In the next few sections we describe in detail each of these classes
and their methods.</p>
<div id="sec:dbManager" class="section level3">
<h3>Class <code>dbManager</code></h3>
<p>This class identifies the relational database management system. It
needs to be extended by individual back-ends (Oracle, PostgreSQL, etc.)
The <code>dbManager</code> class defines the following methods:</p>
<dl>
<dt><code>load</code></dt>
<dd>
<p>initializes the driver code. We suggest having the generator,
<code>dbManager(driver)</code>, automatically load the driver.</p>
</dd>
<dt><code>unload</code></dt>
<dd>
<p>releases whatever resources the driver is using.</p>
</dd>
<dt><code>getVersion</code></dt>
<dd>
<p>returns the version of the RS-DBI currently implemented, plus any
other relevant information about the implementation itself and the RDBMS
being used.</p>
</dd>
</dl>
</div>
<div id="sec:dbConnection" class="section level3">
<h3>Class <code>dbConnection</code></h3>
<p>This virtual class captures a connection to a RDBMS, and it provides
access to dynamic SQL, result sets, RDBMS session management
(transactions), etc. Note that the <code>dbManager</code> may or may not
allow multiple simultaneous connections. The methods it defines
include:</p>
<dl>
<dt><code>dbConnect</code></dt>
<dd>
<p>opens a connection to the database <code>dbname</code>. Other likely
arguments include <code>host</code>, <code>user</code>, and
<code>password</code>. It returns an object that extends
<code>dbConnection</code> in a driver-specific manner (e.g., the MySQL
implementation creates a connection of class
<code>MySQLConnection</code> that extends <code>dbConnection</code>).
Note that we could separate the steps of connecting to a RDBMS and
opening a database there (i.e., opening an <em>instance</em>). For
simplicity we do the 2 steps in this method. If the user needs to open
another instance in the same RDBMS, just open a new connection.</p>
</dd>
<dt><code>close</code></dt>
<dd>
<p>closes the connection and discards all pending work.</p>
</dd>
<dt><code>dbExecStatement</code></dt>
<dd>
<p>submits one SQL statement. It returns a <code>dbResult</code> object,
and in the case of a <code>SELECT</code> statement, the object also
inherits from <code>dbResultSet</code>. This <code>dbResultSet</code>
object is needed for fetching the output rows of <code>SELECT</code>
statements. The result of a non-<code>SELECT</code> statement (e.g.,
<code>UPDATE, DELETE, CREATE, ALTER</code>, …) is defined as the number
of rows affected (this seems to be common among RDBMS).</p>
</dd>
<dt><code>commit</code></dt>
<dd>
<p>commits pending transaction (optional).</p>
</dd>
<dt><code>rollback</code></dt>
<dd>
<p>undoes current transaction (optional).</p>
</dd>
<dt><code>callProc</code></dt>
<dd>
<p>invokes a stored procedure in the RDBMS (tentative). Stored
procedures are <em>not</em> part of the ANSI SQL-92 standard and
possibly vary substantially from one RDBMS to another. For instance,
Oracle seems to have a fairly decent implementation of stored
procedures, but MySQL currently does not support them.</p>
</dd>
<dt><code>dbExec</code></dt>
<dd>
<p>submit an SQL “script” (multiple statements). May be implemented by
looping with <code>dbExecStatement</code>.</p>
</dd>
<dt><code>dbNextResultSet</code></dt>
<dd>
<p>When running SQL scripts (multiple statements), it closes the current
result set in the <code>dbConnection</code>, executes the next statement
and returns its result set.</p>
</dd>
</dl>
</div>
<div id="sec:dbResult" class="section level3">
<h3>Class <code>dbResult</code></h3>
<p>This virtual class describes the result of an SQL statement (any
statement) and the state of the operation. Non-query statements (e.g.,
<code>CREATE</code>, <code>UPDATE</code>, <code>DELETE</code>) set the
“completed” state to 1, while <code>SELECT</code> statements to 0. Error
conditions set this slot to a negative number. The <code>dbResult</code>
class defines the following methods:</p>
<dl>
<dt><code>getStatement</code></dt>
<dd>
<p>returns the SQL statement associated with the result set.</p>
</dd>
<dt><code>getDBConnection</code></dt>
<dd>
<p>returns the <code>dbConnection</code> associated with the result
set.</p>
</dd>
<dt><code>getRowsAffected</code></dt>
<dd>
<p>returns the number of rows affected by the operation.</p>
</dd>
<dt><code>hasCompleted</code></dt>
<dd>
<p>was the operation completed? <code>SELECT</code>’s, for instance, are
not completed until their output rows are all fetched.</p>
</dd>
<dt><code>getException</code></dt>
<dd>
<p>returns the status of the last SQL statement on a given connection as
a list with two members, status code and status description.</p>
</dd>
</dl>
</div>
<div id="sec:dbResultSet" class="section level3">
<h3>Class <code>dbResultSet</code></h3>
<p>This virtual class extends <code>dbResult</code>, and it describes
additional information from the result of a <code>SELECT</code>
statement and the state of the operation. The <code>completed</code>
state is set to 0 so long as there are pending rows to fetch. The
<code>dbResultSet</code> class defines the following additional
methods:</p>
<dl>
<dt><code>getRowCount</code></dt>
<dd>
<p>returns the number of rows fetched so far.</p>
</dd>
<dt><code>getNullOk</code></dt>
<dd>
<p>returns a logical vector with as many elements as there are fields in
the result set, each element describing whether the corresponding field
accepts <code>NULL</code> values.</p>
</dd>
<dt><code>getFields</code></dt>
<dd>
<p>describes the <code>SELECT</code>ed fields. The description includes
field names, RDBMS internal data types, internal length, internal
precision and scale, null flag (i.e., column allows
<code>NULL</code>’s), and corresponding S classes (which can be
over-ridden with user-provided classes). The current MySQL and Oracle
implementations define a <code>dbResultSet</code> as a named list with
the following elements:</p>
<dl>
<dt><code>connection</code>:</dt>
<dd>
<p>the connection object associated with this result set;</p>
</dd>
<dt><code>statement</code>:</dt>
<dd>
<p>a string with the SQL statement being processed;</p>
</dd>
<dt><code>description</code>:</dt>
<dd>
<p>a field description <code>data.frame</code> with as many rows as
there are fields in the <code>SELECT</code> output, and columns
specifying the <code>name</code>, <code>type</code>,
<code>length</code>, <code>precision</code>, <code>scale</code>,
<code>Sclass</code> of the corresponding output field.</p>
</dd>
<dt><code>rowsAffected</code>:</dt>
<dd>
<p>the number of rows that were affected;</p>
</dd>
<dt><code>rowCount</code>:</dt>
<dd>
<p>the number of rows so far fetched;</p>
</dd>
<dt><code>completed</code>:</dt>
<dd>
<p>a logical value describing whether the operation has completed or
not.</p>
</dd>
<dt><code>nullOk</code>:</dt>
<dd>
<p>a logical vector specifying whether the corresponding column may take
NULL values.</p>
</dd>
</dl>
<p>The methods above are implemented as accessor functions to this list
in the obvious way.</p>
</dd>
<dt><code>setDataMappings</code></dt>
<dd>
<p>defines a conversion between internal RDBMS data types and R/S
classes. We expect the default mappings to be by far the most common
ones, but users that need more control may specify a class generator for
individual fields in the result set. (See Section [sec:mappings] for
details.)</p>
</dd>
<dt><code>close</code></dt>
<dd>
<p>closes the result set and frees resources both in R/S and the
RDBMS.</p>
</dd>
<dt><code>fetch</code></dt>
<dd>
<p>extracts the next <code>max.rec</code> records (-1 means all).</p>
</dd>
</dl>
</div>
</div>
<div id="sec:mappings" class="section level2">
<h2>Data Type Mappings</h2>
<p>The data types supported by databases are slightly different than the
data types in R and S, but the mapping between them is straightforward:
Any of the many fixed and varying length character types are mapped to
R/S <code>character</code>. Fixed-precision (non-IEEE) numbers are
mapped into either doubles (<code>numeric</code>) or long
(<code>integer</code>). Dates are mapped to character using the
appropriate <code>TO_CHAR</code> function in the RDBMS (which should
take care of any locale information). Some RDBMS support the type
<code>CURRENCY</code> or <code>MONEY</code> which should be mapped to
<code>numeric</code>. Large objects (character, binary, file, etc.) also
need to be mapped. User-defined functions may be specified to do the
actual conversion as follows:</p>
<ol style="list-style-type: decimal">
<li><p>run the query (either with <code>dbExec</code> or
<code>dbExecStatement</code>):</p>
<pre><code>&gt; rs &lt;- dbExecStatement(con, &quot;select whatever-You-need&quot;)</code></pre></li>
<li><p>extract the output field definitions</p>
<pre><code>&gt; flds &lt;- getFields(rs)</code></pre></li>
<li><p>replace the class generator in the, say 3rd field, by the user
own generator:</p>
<pre><code>&gt; flds[3, &quot;Sclass&quot;]            # default mapping
[1] &quot;character&quot;</code></pre>
<p>by</p>
<pre><code>&gt; flds[3, &quot;Sclass&quot;] &lt;- &quot;myOwnGeneratorFunction&quot;</code></pre></li>
<li><p>set the new data mapping prior to fetching</p>
<pre><code>&gt; setDataMappings(resutlSet, flds)</code></pre></li>
<li><p>fetch the rows and store in a <code>data.frame</code></p>
<pre><code>&gt; data &lt;- fetch(resultSet)</code></pre></li>
</ol>
</div>
<div id="sec:open-issues" class="section level2">
<h2>Open Issues</h2>
<p>We may need to provide some additional utilities, for instance to
convert dates, to escape characters such as quotes and slashes in query
strings, to strip excessive blanks from some character fields, etc. We
need to decide whether we provide hooks so these conversions are done at
the C level, or do all the post-processing in R or S.</p>
<p>Another issue is what kind of data object is the output of an SQL
query. Currently the MySQL and Oracle implementations return data as a
<code>data.frame</code>; data frames have the slight inconvenience that
they automatically re-label the fields according to R/S syntax, changing
the actual RDBMS labels of the variables; the issue of non-numeric data
being coerced into factors automatically “at the drop of a hat” (as
someone in s-news wrote) is also annoying.</p>
<p>The execution of SQL scripts is not fully described. The method that
executes scripts could run individual statements without returning until
it encounters a query (<code>SELECT</code>-like) statement. At that
point it could return that one result set. The application is then
responsible for fetching these rows, and then for invoking
<code>dbNextResultSet</code> on the opened <code>dbConnection</code>
object to repeat the <code>dbExec</code>/<code>fetch</code> loop until
it encounters the next <code>dbResultSet</code>. And so on. Another
(potentially very expensive) alternative would be to run all statements
sequentially and return a list of <code>data.frame</code>s, each element
of the list storing the result of each statement.</p>
<p>Binary objects and large objects present some challenges both to R
and S. It is becoming more common to store images, sounds, and other
data types as binary objects in RDBMS, some of which can be in principle
quite large. The SQL-92 ANSI standard allows up to 2 gigabytes for some
of these objects. We need to carefully plan how to deal with binary
objects – perhaps tentatively not in full generality. Large objects
could be fetched by repeatedly invoking a specified R/S function that
takes as argument chunks of a specified number of raw bytes. In the case
of S4 (and Splus5.x) the RS-DBI implementation can write into an opened
connection for which the user has defined a reader (but can we guarantee
that we won’t overflow the connection?). In the case of R it is not
clear what data type binary large objects (BLOB) should be mapped
into.</p>
</div>
<div id="sec:limitations" class="section level2">
<h2>Limitations</h2>
<p>These are some of the limitations of the current interface
definition:</p>
<ul>
<li><p>we only allow one SQL statement at a time, forcing users to split
SQL scripts into individual statements;</p></li>
<li><p>transaction management is not fully described;</p></li>
<li><p>the interface is heavily biased towards queries, as opposed to
general purpose database development. In particular we made no attempt
to define “bind variables”; this is a mechanism by which the contents of
S objects are implicitly moved to the database during SQL execution. For
instance, the following embedded SQL statement</p>
<pre><code>  /* SQL */
  SELECT * from emp_table where emp_id = :sampleEmployee</code></pre>
<p>would take the vector <code>sampleEmployee</code> and iterate over
each of its elements to get the result. Perhaps RS-DBI could at some
point in the future implement this feature.</p></li>
</ul>
</div>
</div>
<div id="other-approaches" class="section level1">
<h1>Other Approaches</h1>
<p>The high-level, front-end description of RS-DBI is the more critical
aspect of the interface. Details on how to actually implement this
interface may change over time. The approach described in this document
based on one back-end driver per RDBMS is reasonable, but not the only
approach – we simply felt that a simpler approach based on
well-understood and self-contained tools (R, S, and C API’s) would be a
better start. Nevertheless we want to briefly mention a few alternatives
that we considered and tentatively decided against, but may quite
possibly re-visit in the near future.</p>
<div id="sec:odbc" class="section level2">
<h2>Open Database Connectivity (ODBC)</h2>
<p>The ODBC protocol was developed by Microsoft to allow connectivity
among C/C++ applications and RDBMS. As you would expect, originally
implementations of the ODBC were only available under Windows
environments. There are various effort to create a Unix implementation
(see <a href="https://www.unixodbc.org/">the Unix ODBC</a> web-site and
<span class="citation">Harvey (1999)</span>). This approach looks
promising because it allows us to write only one back-end, instead of
one per RDBMS. Since most RDBMS already provide ODBC drivers, this could
greatly simplify development. Unfortunately, the Unix implementation of
ODBC was not mature enough at the time we looked at it, a situation we
expect will change in the next year or so. At that point we will need to
re-evaluate it to make sure that such an ODBC interface does not
penalize the interface in terms of performance, ease of use, portability
among the various Unix versions, etc.</p>
</div>
<div id="sec:jdbc" class="section level2">
<h2>Java Database Connectivity (JDBC)</h2>
<p>Another protocol, the Java database connectivity, is very well-done
and supported by just about every RDBMS. The issue with JDBC is that as
of today neither S nor R (which are written in C) interfaces cleanly
with Java. There are several efforts (some in a quite fairly advanced
state) to allow S and R to invoke Java methods. Once this interface is
widely available in Splus5x and R we will need to re-visit this issue
again and study the performance, usability, etc., of JDBC as a common
back-end to the RS-DBI.</p>
</div>
<div id="sec:corba" class="section level2">
<h2>CORBA and a 3-tier Architecture</h2>
<p>Yet another approach is to move the interface to RDBMS out of R and S
altogether into a separate system or server that would serve as a proxy
between R/S and databases. The communication to this middle-layer proxy
could be done through CORBA <span class="citation">Siegel (1996)</span>,
Java’s RMI, or some other similar technology. Such a design could be
very flexible, but the CORBA facilities both in R and S are not widely
available yet, and we do not know whether this will be made available to
Splus5 users from MathSoft. Also, my experience with this technology is
rather limited.</p>
<p>On the other hand, this 3-tier architecture seem to offer the most
flexibility to cope with very large distributed databases, not
necessarily relational.</p>
</div>
</div>
<div id="sec:resources" class="section level1">
<h1>Resources</h1>
<p>The latest documentation and software on the RS-DBI was available at
www.omegahat.net (link dead now:
<code>https://www.omegahat.net/contrib/RS-DBI/index.html</code>). The R
community has developed interfaces to some databases: <a href="https://cran.r-project.org/src/contrib/Archive/RmSQL/">RmSQL</a>
is an interface to the <a href="https://www.hughes.com.au/">mSQL</a>
database written by Torsten Hothorn; <a href="https://keittlab.org">RPgSQL</a> is an interface to <a href="https://www.postgreSQL.org">PostgreSQL</a> and was written by
Timothy H. Keitt; <a href="https://www.stats.ox.ac.uk/pub/bdr/">RODBC</a> is an interface to
ODBC, and it was written by <a href="mailto:mlapsley@sthelier.sghms.ac.uk">Michael Lapsley</a>. (For
more details on all these see <span class="citation"><em><span>R</span>
Data Import/Export</em> (2001)</span>.)</p>
<p>The are R and S-Plus interfaces to <a href="https://dev.mysql.com/">MySQL</a> that follow the propose RS-DBI
API described here; also, there’s an S-Plus interface SOracle <span class="citation">James (In preparation)</span> to Oracle (we expect to
have an R implementation soon.)</p>
<p>The idea of a common interface to databases has been successfully
implemented in Java’s Database Connectivity (JDBC) (<a href="https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/">www.javasoft.com</a>),
in C through the Open Database Connectivity (ODBC) (<a href="https://www.unixodbc.org/">www.unixodbc.org</a>), in Python’s
Database Application Programming Interface (<a href="https://www.python.org">www.python.org</a>), and in Perl’s
Database Interface (<a href="https://www.cpan.org">www.cpan.org</a>).</p>
</div>
<div id="acknowledgements" class="section level1">
<h1>Acknowledgements</h1>
<p>The R/S database interface came about from suggestions, comments, and
discussions with <a href="mailto:jmc@research.bell-labs.com">John M.
Chambers</a> and <a href="mailto:duncan@research.bell-labs.com">Duncan
Temple Lang</a> in the context of the Omega Project for Statistical
Computing. <a href="mailto:bates@stat.wisc.edu">Doug Bates</a> and <a href="mailto:saikat@stat.wisc.edu">Saikat DebRoy</a> ported (and greatly
improved) the first MySQL implementation to R.</p>
</div>
<div id="the-s-version-4-definitions" class="section level1">
<h1>The S Version 4 Definitions</h1>
<p>The following code is meant to serve as a detailed description of the
R/S to database interface. We decided to use S4 (instead of R or S
version 3) because its clean syntax help us to describe easily the
classes and methods that form the RS-DBI, and also to convey the
inter-class relationships.</p>
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a><span class="do">## Define all the classes and methods to be used by an</span></span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a><span class="do">## implementation of the RS-DataBase Interface.  Mostly,</span></span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a><span class="do">## these classes are virtual and each driver should extend</span></span>
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a><span class="do">## them to provide the actual implementation.</span></span>
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a></span>
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="do">## Class: dbManager</span></span>
<span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a><span class="do">## This class identifies the DataBase Management System</span></span>
<span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="do">## (Oracle, MySQL, Informix, PostgreSQL, etc.)</span></span>
<span id="cb11-9"><a href="#cb11-9" tabindex="-1"></a></span>
<span id="cb11-10"><a href="#cb11-10" tabindex="-1"></a><span class="fu">setClass</span>(<span class="st">&quot;dbManager&quot;</span>, VIRTUAL)</span>
<span id="cb11-11"><a href="#cb11-11" tabindex="-1"></a></span>
<span id="cb11-12"><a href="#cb11-12" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;load&quot;</span>,</span>
<span id="cb11-13"><a href="#cb11-13" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(dbMgr,...)</span>
<span id="cb11-14"><a href="#cb11-14" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;load&quot;</span>)</span>
<span id="cb11-15"><a href="#cb11-15" tabindex="-1"></a>   )</span>
<span id="cb11-16"><a href="#cb11-16" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;unload&quot;</span>,</span>
<span id="cb11-17"><a href="#cb11-17" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(dbMgr,...)</span>
<span id="cb11-18"><a href="#cb11-18" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;unload&quot;</span>)</span>
<span id="cb11-19"><a href="#cb11-19" tabindex="-1"></a>   )</span>
<span id="cb11-20"><a href="#cb11-20" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getVersion&quot;</span>,</span>
<span id="cb11-21"><a href="#cb11-21" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(dbMgr,...)</span>
<span id="cb11-22"><a href="#cb11-22" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getVersion&quot;</span>)</span>
<span id="cb11-23"><a href="#cb11-23" tabindex="-1"></a>   )</span>
<span id="cb11-24"><a href="#cb11-24" tabindex="-1"></a></span>
<span id="cb11-25"><a href="#cb11-25" tabindex="-1"></a><span class="do">## Class: dbConnections</span></span>
<span id="cb11-26"><a href="#cb11-26" tabindex="-1"></a><span class="do">## This class captures a connection to a database instance.</span></span>
<span id="cb11-27"><a href="#cb11-27" tabindex="-1"></a></span>
<span id="cb11-28"><a href="#cb11-28" tabindex="-1"></a><span class="fu">setClass</span>(<span class="st">&quot;dbConnection&quot;</span>, VIRTUAL)</span>
<span id="cb11-29"><a href="#cb11-29" tabindex="-1"></a></span>
<span id="cb11-30"><a href="#cb11-30" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;dbConnection&quot;</span>,</span>
<span id="cb11-31"><a href="#cb11-31" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(dbMgr, ...)</span>
<span id="cb11-32"><a href="#cb11-32" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;dbConnection&quot;</span>)</span>
<span id="cb11-33"><a href="#cb11-33" tabindex="-1"></a>   )</span>
<span id="cb11-34"><a href="#cb11-34" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;dbConnect&quot;</span>,</span>
<span id="cb11-35"><a href="#cb11-35" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(dbMgr, ...)</span>
<span id="cb11-36"><a href="#cb11-36" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;dbConnect&quot;</span>)</span>
<span id="cb11-37"><a href="#cb11-37" tabindex="-1"></a>   )</span>
<span id="cb11-38"><a href="#cb11-38" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;dbExecStatement&quot;</span>,</span>
<span id="cb11-39"><a href="#cb11-39" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, statement, ...)</span>
<span id="cb11-40"><a href="#cb11-40" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;dbExecStatement&quot;</span>)</span>
<span id="cb11-41"><a href="#cb11-41" tabindex="-1"></a>   )</span>
<span id="cb11-42"><a href="#cb11-42" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;dbExec&quot;</span>,</span>
<span id="cb11-43"><a href="#cb11-43" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, statement, ...)</span>
<span id="cb11-44"><a href="#cb11-44" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;dbExec&quot;</span>)</span>
<span id="cb11-45"><a href="#cb11-45" tabindex="-1"></a>   )</span>
<span id="cb11-46"><a href="#cb11-46" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getResultSet&quot;</span>,</span>
<span id="cb11-47"><a href="#cb11-47" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, ..)</span>
<span id="cb11-48"><a href="#cb11-48" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getResultSet&quot;</span>)</span>
<span id="cb11-49"><a href="#cb11-49" tabindex="-1"></a>   )</span>
<span id="cb11-50"><a href="#cb11-50" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;commit&quot;</span>,</span>
<span id="cb11-51"><a href="#cb11-51" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, ...)</span>
<span id="cb11-52"><a href="#cb11-52" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;commit&quot;</span>)</span>
<span id="cb11-53"><a href="#cb11-53" tabindex="-1"></a>   )</span>
<span id="cb11-54"><a href="#cb11-54" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;rollback&quot;</span>,</span>
<span id="cb11-55"><a href="#cb11-55" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, ...)</span>
<span id="cb11-56"><a href="#cb11-56" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;rollback&quot;</span>)</span>
<span id="cb11-57"><a href="#cb11-57" tabindex="-1"></a>   )</span>
<span id="cb11-58"><a href="#cb11-58" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;callProc&quot;</span>,</span>
<span id="cb11-59"><a href="#cb11-59" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, ...)</span>
<span id="cb11-60"><a href="#cb11-60" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;callProc&quot;</span>)</span>
<span id="cb11-61"><a href="#cb11-61" tabindex="-1"></a>   )</span>
<span id="cb11-62"><a href="#cb11-62" tabindex="-1"></a><span class="fu">setMethod</span>(<span class="st">&quot;close&quot;</span>,</span>
<span id="cb11-63"><a href="#cb11-63" tabindex="-1"></a>   <span class="at">signature =</span> <span class="fu">list</span>(<span class="at">con=</span><span class="st">&quot;dbConnection&quot;</span>, <span class="at">type=</span><span class="st">&quot;missing&quot;</span>),</span>
<span id="cb11-64"><a href="#cb11-64" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, type) <span class="cn">NULL</span></span>
<span id="cb11-65"><a href="#cb11-65" tabindex="-1"></a>   )</span>
<span id="cb11-66"><a href="#cb11-66" tabindex="-1"></a></span>
<span id="cb11-67"><a href="#cb11-67" tabindex="-1"></a><span class="do">## Class: dbResult</span></span>
<span id="cb11-68"><a href="#cb11-68" tabindex="-1"></a><span class="do">## This is a base class for arbitrary results from the RDBMS</span></span>
<span id="cb11-69"><a href="#cb11-69" tabindex="-1"></a><span class="do">## (INSERT, UPDATE, DELETE).  SELECTs (and SELECT-like)</span></span>
<span id="cb11-70"><a href="#cb11-70" tabindex="-1"></a><span class="do">## statements produce &quot;dbResultSet&quot; objects, which extend</span></span>
<span id="cb11-71"><a href="#cb11-71" tabindex="-1"></a><span class="do">## dbResult.</span></span>
<span id="cb11-72"><a href="#cb11-72" tabindex="-1"></a></span>
<span id="cb11-73"><a href="#cb11-73" tabindex="-1"></a><span class="fu">setClass</span>(<span class="st">&quot;dbResult&quot;</span>, VIRTUAL)</span>
<span id="cb11-74"><a href="#cb11-74" tabindex="-1"></a></span>
<span id="cb11-75"><a href="#cb11-75" tabindex="-1"></a><span class="fu">setMethod</span>(<span class="st">&quot;close&quot;</span>,</span>
<span id="cb11-76"><a href="#cb11-76" tabindex="-1"></a>   <span class="at">signature =</span> <span class="fu">list</span>(<span class="at">con=</span><span class="st">&quot;dbResult&quot;</span>, <span class="at">type=</span><span class="st">&quot;missing&quot;</span>),</span>
<span id="cb11-77"><a href="#cb11-77" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(con, type) <span class="cn">NULL</span></span>
<span id="cb11-78"><a href="#cb11-78" tabindex="-1"></a>   )</span>
<span id="cb11-79"><a href="#cb11-79" tabindex="-1"></a></span>
<span id="cb11-80"><a href="#cb11-80" tabindex="-1"></a><span class="do">## Class: dbResultSet</span></span>
<span id="cb11-81"><a href="#cb11-81" tabindex="-1"></a><span class="do">## Note that we define a resultSet as the result of a</span></span>
<span id="cb11-82"><a href="#cb11-82" tabindex="-1"></a><span class="do">## SELECT  SQL statement.</span></span>
<span id="cb11-83"><a href="#cb11-83" tabindex="-1"></a></span>
<span id="cb11-84"><a href="#cb11-84" tabindex="-1"></a><span class="fu">setClass</span>(<span class="st">&quot;dbResultSet&quot;</span>, <span class="st">&quot;dbResult&quot;</span>)</span>
<span id="cb11-85"><a href="#cb11-85" tabindex="-1"></a></span>
<span id="cb11-86"><a href="#cb11-86" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;fetch&quot;</span>,</span>
<span id="cb11-87"><a href="#cb11-87" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(resultSet,n,...)</span>
<span id="cb11-88"><a href="#cb11-88" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;fetch&quot;</span>)</span>
<span id="cb11-89"><a href="#cb11-89" tabindex="-1"></a>   )</span>
<span id="cb11-90"><a href="#cb11-90" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;hasCompleted&quot;</span>,</span>
<span id="cb11-91"><a href="#cb11-91" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-92"><a href="#cb11-92" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;hasCompleted&quot;</span>)</span>
<span id="cb11-93"><a href="#cb11-93" tabindex="-1"></a>   )</span>
<span id="cb11-94"><a href="#cb11-94" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getException&quot;</span>,</span>
<span id="cb11-95"><a href="#cb11-95" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-96"><a href="#cb11-96" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getException&quot;</span>)</span>
<span id="cb11-97"><a href="#cb11-97" tabindex="-1"></a>   )</span>
<span id="cb11-98"><a href="#cb11-98" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getDBconnection&quot;</span>,</span>
<span id="cb11-99"><a href="#cb11-99" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-100"><a href="#cb11-100" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getDBconnection&quot;</span>)</span>
<span id="cb11-101"><a href="#cb11-101" tabindex="-1"></a>   )</span>
<span id="cb11-102"><a href="#cb11-102" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;setDataMappings&quot;</span>,</span>
<span id="cb11-103"><a href="#cb11-103" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(resultSet, ...)</span>
<span id="cb11-104"><a href="#cb11-104" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;setDataMappings&quot;</span>)</span>
<span id="cb11-105"><a href="#cb11-105" tabindex="-1"></a>   )</span>
<span id="cb11-106"><a href="#cb11-106" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getFields&quot;</span>,</span>
<span id="cb11-107"><a href="#cb11-107" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, table, dbname,  ...)</span>
<span id="cb11-108"><a href="#cb11-108" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getFields&quot;</span>)</span>
<span id="cb11-109"><a href="#cb11-109" tabindex="-1"></a>   )</span>
<span id="cb11-110"><a href="#cb11-110" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getStatement&quot;</span>,</span>
<span id="cb11-111"><a href="#cb11-111" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-112"><a href="#cb11-112" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getStatement&quot;</span>)</span>
<span id="cb11-113"><a href="#cb11-113" tabindex="-1"></a>   )</span>
<span id="cb11-114"><a href="#cb11-114" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getRowsAffected&quot;</span>,</span>
<span id="cb11-115"><a href="#cb11-115" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-116"><a href="#cb11-116" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getRowsAffected&quot;</span>)</span>
<span id="cb11-117"><a href="#cb11-117" tabindex="-1"></a>   )</span>
<span id="cb11-118"><a href="#cb11-118" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getRowCount&quot;</span>,</span>
<span id="cb11-119"><a href="#cb11-119" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-120"><a href="#cb11-120" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getRowCount&quot;</span>)</span>
<span id="cb11-121"><a href="#cb11-121" tabindex="-1"></a>   )</span>
<span id="cb11-122"><a href="#cb11-122" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getNullOk&quot;</span>,</span>
<span id="cb11-123"><a href="#cb11-123" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-124"><a href="#cb11-124" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getNullOk&quot;</span>)</span>
<span id="cb11-125"><a href="#cb11-125" tabindex="-1"></a>   )</span>
<span id="cb11-126"><a href="#cb11-126" tabindex="-1"></a></span>
<span id="cb11-127"><a href="#cb11-127" tabindex="-1"></a><span class="do">## Meta-data:</span></span>
<span id="cb11-128"><a href="#cb11-128" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getInfo&quot;</span>,</span>
<span id="cb11-129"><a href="#cb11-129" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-130"><a href="#cb11-130" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getInfo&quot;</span>)</span>
<span id="cb11-131"><a href="#cb11-131" tabindex="-1"></a>   )</span>
<span id="cb11-132"><a href="#cb11-132" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;describe&quot;</span>,</span>
<span id="cb11-133"><a href="#cb11-133" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, <span class="at">verbose=</span>F, ...)</span>
<span id="cb11-134"><a href="#cb11-134" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;describe&quot;</span>)</span>
<span id="cb11-135"><a href="#cb11-135" tabindex="-1"></a>   )</span>
<span id="cb11-136"><a href="#cb11-136" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getCurrentDatabase&quot;</span>,</span>
<span id="cb11-137"><a href="#cb11-137" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-138"><a href="#cb11-138" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getCurrentDatabase&quot;</span>)</span>
<span id="cb11-139"><a href="#cb11-139" tabindex="-1"></a>   )</span>
<span id="cb11-140"><a href="#cb11-140" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getDatabases&quot;</span>,</span>
<span id="cb11-141"><a href="#cb11-141" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, ...)</span>
<span id="cb11-142"><a href="#cb11-142" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getDatabases&quot;</span>)</span>
<span id="cb11-143"><a href="#cb11-143" tabindex="-1"></a>   )</span>
<span id="cb11-144"><a href="#cb11-144" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getTables&quot;</span>,</span>
<span id="cb11-145"><a href="#cb11-145" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, dbname, ...)</span>
<span id="cb11-146"><a href="#cb11-146" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getTables&quot;</span>)</span>
<span id="cb11-147"><a href="#cb11-147" tabindex="-1"></a>   )</span>
<span id="cb11-148"><a href="#cb11-148" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getTableFields&quot;</span>,</span>
<span id="cb11-149"><a href="#cb11-149" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, table, dbname, ...)</span>
<span id="cb11-150"><a href="#cb11-150" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getTableFields&quot;</span>)</span>
<span id="cb11-151"><a href="#cb11-151" tabindex="-1"></a>   )</span>
<span id="cb11-152"><a href="#cb11-152" tabindex="-1"></a><span class="fu">setGeneric</span>(<span class="st">&quot;getTableIndices&quot;</span>,</span>
<span id="cb11-153"><a href="#cb11-153" tabindex="-1"></a>   <span class="at">def =</span> <span class="cf">function</span>(object, table, dbname, ...)</span>
<span id="cb11-154"><a href="#cb11-154" tabindex="-1"></a>   <span class="fu">standardGeneric</span>(<span class="st">&quot;getTableIndices&quot;</span>)</span>
<span id="cb11-155"><a href="#cb11-155" tabindex="-1"></a>   )</span></code></pre></div>
<div id="refs" class="references csl-bib-body hanging-indent">
<div id="ref-S4" class="csl-entry">
Chambers, J. M. 1998. <em>Programming with Data: A Guide to the s
Language</em>. Springer.
</div>
<div id="ref-s-corba.98" class="csl-entry">
Chambers, John M., Mark H. Hansen, David A. James, and Duncan Temple
Lang. 1998. <span>“Distributed Computing with Data: A CORBA-Based
Approach.”</span> <em>Computing Science and Statistics</em>.
</div>
<div id="ref-odbc.lj" class="csl-entry">
Harvey, Peter. 1999. <span>“<span>Open Database
Connectivity</span>.”</span> <em><span>Linux Journal</span></em> Nov.
(67): 68–72.
</div>
<div id="ref-RS-Oracle" class="csl-entry">
James, David A. In preparation. <em>An <span>R</span>/<span>S</span>
Interface to the <span>Oracle</span> Database</em>. Bell Labs, Lucent
Technologies.
</div>
<div id="ref-R.imp-exp" class="csl-entry">
<em><span>R</span> Data Import/Export</em>. 2001. R-Development Core
Team.
</div>
<div id="ref-corba:siegel.96" class="csl-entry">
Siegel, Jon. 1996. <em>CORBA Fundamentals and Programming</em>. Wiley.
</div>
<div id="ref-duncan2000" class="csl-entry">
Temple Lang, Duncan. 2000. <span>“<span class="nocase">The Omegahat
Environment: New Possibilities for Statistical Computing</span>.”</span>
<em>Journal of Computational and Graphical Statistics</em> to appear.
</div>
<div id="ref-R-dbms" class="csl-entry">
<em>Using Relational Database Systems with <span>R</span></em>. 2000.
R-Developemt Core Team.
</div>
<div id="ref-sql92" class="csl-entry">
<em><span class="nocase">X/Open CAE Specification: SQL and
RDA</span></em>. 1994. X/Open Company Ltd.
</div>
</div>
</div>
<div class="footnotes footnotes-end-of-document">
<hr />
<ol>
<li id="fn1"><p>A virtual class allows us to group classes that share
some common functionality, e.g., the virtual class
“<code>dbConnection</code>” groups all the connection implementations by
Informix, Ingres, DB/2, Oracle, etc. Although the details will vary from
one RDBMS to another, the defining characteristic of these objects is
what a virtual class captures. R and S version 3 do not explicitly
define virtual classes, but they can easily implement the idea through
inheritance.<a href="#fnref1" class="footnote-back">↩︎</a></p></li>
<li id="fn2"><p>The term “database” is sometimes (confusingly) used both
to denote the RDBMS, such as Oracle, MySQL, and also to denote a
particular database instance under a RDBMS, such as “opto” or “sales”
databases under the same RDBMS.<a href="#fnref2" class="footnote-back">↩︎</a></p></li>
</ol>
</div>



<!-- code folding -->


<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>