File: parsers.lisp

package info (click to toggle)
cl-pg 1:20061216-4
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 276 kB
  • ctags: 233
  • sloc: lisp: 3,125; makefile: 43
file content (315 lines) | stat: -rw-r--r-- 12,034 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
;;; parsers.lisp -- type coercion support
;;;
;;; Author: Eric Marsden <eric.marsden@free.fr>
;;
;;
;; When returning data from a SELECT statement, PostgreSQL starts by
;; sending some metadata describing the attributes. This information
;; is read by `PG:READ-ATTRIBUTES', and consists of each attribute's
;; name (as a string), its size (in bytes), and its type (as an oid
;; which points to a row in the PostgreSQL system table pg_type). Each
;; row in pg_type includes the type's name (as a string).
;;
;; We are able to parse a certain number of the PostgreSQL types (for
;; example, numeric data is converted to a numeric Common Lisp type,
;; dates are converted to the CL date representation, booleans to
;; lisp booleans). However, there isn't a fixed mapping from a
;; type to its OID which is guaranteed to be stable across database
;; installations, so we need to build a table mapping OIDs to parser
;; functions.
;;
;; This is done by the procedure `PG:INITIALIZE-PARSERS', which is run
;; the first time a connection is initiated with the database from
;; this invocation of CL, and which issues a SELECT statement to
;; extract the required information from pg_type. This initialization
;; imposes a slight overhead on the first request, which you can avoid
;; by setting `*PG-DISABLE-TYPE-COERCION*' to non-nil if it bothers you.
;; ====================================================================


;;; TODO ============================================================
;;
;; * add a mechanism for parsing user-defined types. The user should
;;   be able to define a parse function and a type-name; we query
;;   pg_type to get the type's OID and add the information to
;;   pg:*parsers*.
;;


(declaim (optimize (speed 3) (safety 1)))


(in-package :postgresql)


(defvar *pg-disable-type-coercion* nil
  "Non-nil disables the type coercion mechanism.
The default is nil, which means that data recovered from the
database is coerced to the corresponding Common Lisp type before
being returned; for example numeric data is transformed to CL
numbers, and booleans to booleans.

The coercion mechanism requires an initialization query to the
database, in order to build a table mapping type names to OIDs. This
option is provided mainly in case you wish to avoid the overhead of
this initial query. The overhead is only incurred once per session
(not per connection to the backend).")

;; alist of (oid . parser) pairs. This is built dynamically at
;; initialization of the connection with the database (once generated,
;; the information is shared between connections).
(defvar *parsers* '())


(defvar *type-to-oid*
  (make-hash-table :test #'eq)
  "Is a hashtable for turning a typename into a OID.
Needed to define the type of objects in pg-prepare")

(defvar *type-parsers*
  `(("bool"      . ,'bool-parser)
    ("bytea"     . ,'identity)
    ("char"      . ,'text-parser)
    ("char2"     . ,'text-parser)
    ("char4"     . ,'text-parser)
    ("char8"     . ,'text-parser)
    ("char16"    . ,'text-parser)
    ("text"      . ,'text-parser)
    ("varchar"   . ,'text-parser)
    ("numeric"   . ,'numeric-parser)
    ("int2"      . ,'integer-parser)
    ("int4"      . ,'integer-parser)
    ("int8"      . ,'integer-parser)
    ;; int2vector
    ("oid"       . ,'integer-parser)
    ;; oidvector
    ;; bit
    ;; varbit
    ;; record
    ;; cstring
    ;; any
    ("row"       . ,'row-parser)
    ("float4"    . ,'float-parser)
    ("float8"    . ,'float-parser)
    ("money"     . ,'text-parser)       ; "$12.34"
    ("abstime"   . ,'timestamp-parser)
    ("date"      . ,'date-parser)
    ("timestamp" . ,'timestamp-parser)  ; or 'precise-timestamp-parser if you want milliseconds
    ("timestamptz" . ,'timestamp-parser)
    ("datetime"  . ,'timestamp-parser)
    ("time"      . ,'text-parser)     ; preparsed "15:32:45"
    ("timetz"    . ,'text-parser)
    ("reltime"   . ,'text-parser)     ; don't know how to parse these
    ("timespan"  . ,'interval-parser)
    ("interval"  . ,'interval-parser)
    ("tinterval" . ,'interval-parser)))


;; see `man pgbuiltin' for details on PostgreSQL builtin types
(defun integer-parser (str) (parse-integer str))

;; from Risto Sakari Laakso <rlaakso@cc.hut.fi>
;; 
;; http://www.postgresql.org/docs/7.4/static/datatype.html#DATATYPE-NUMERIC-DECIMAL
;; 
;; NUMERIC(precision, scale)
;; 
;; The scale of a numeric is the count of decimal digits in the
;; fractional part, to the right of the decimal point. The precision of a
;; numeric is the total count of significant digits in the whole number, that
;; is, the number of digits to both sides of the decimal point.
(defun numeric-parser (str)         
  (let ((dot-pos (position #\. str))
        integer-part
        (decimal-part 0))
    ;; parse up to #\., or whole string if #\. not present 
    (setq integer-part (parse-integer (subseq str 0 dot-pos)))
    ;; if #\. present ..
    (when dot-pos
      (let* ((decimal-str (subseq str (1+ dot-pos)))
             (dec-str-len (length decimal-str)))
        
        ;; if has at least one digit after #\.
        (when (> dec-str-len 0)
          ;; parse integer after #\. and divide by 10^(digits), i.e. ".023" => 23/1000     
          (setq decimal-part (/ (parse-integer decimal-str) (expt 10 dec-str-len))))))
    (if (eq #\- (elt str 0))
        (- integer-part decimal-part)
        (+ integer-part decimal-part))))

;; FIXME switch to a specialized float parser that conses less
(defun float-parser (str)
  (declare (type simple-string str))
  (let ((*read-eval* nil))
    (read-from-string str)))

;; here we are assuming that the value of *PG-CLIENT-ENCODING* is
;; compatible with the encoding that the CL implementation uses for
;; strings. The backend should convert all values belonging to one of
;; the text data types from the table's internal representation to
;; that requested by the client, so here we don't need to do any
;; conversion.
(defun text-parser (str) str)

(defun bool-parser (str)
  (declare (type simple-string str))
  (cond ((string= "t" str) t)
        ((string= "f" str) nil)
        (t (error 'protocol-error
                  :reason "Badly formed boolean from backend: ~s" str))))

(defun parse-timestamp (str)
  (declare (type simple-string str))
  (let* ((year (parse-integer str :start 0 :end 4))
         (month (parse-integer str :start 5 :end 7))
         (day (parse-integer str :start 8 :end 10))
         (hours (parse-integer str :start 11 :end 13))
         (minutes (parse-integer str :start 14 :end 16))
         (seconds (parse-integer str :start 17 :end 19))
         (length (length str))
         (start-tz (if (find (char str (- length 3)) "+-")
                       (- length 3)))
         (tz (when start-tz
               (parse-integer str :start start-tz)))
         (milliseconds (if (and (< 19 length) (eql (char str 19) #\.))
                           (parse-integer str :start 20 :end start-tz)
                           0)))
    (values year month day hours minutes seconds milliseconds tz)))

;; format for abstime/timestamp etc with ISO output syntax is
;;
;;    "1999-01-02 05:11:23.0345645+01"
;;
;; which we convert to a CL universal time. Thanks to James Anderson
;; for a fix for timestamp format in PostgreSQL 7.3 (with or without
;; tz, with or without milliseconds).
(defun timestamp-parser (str)
  ;; Test for the special values 'infinity' and '-infinity'
  (cond ((digit-char-p (schar str 0))
         (multiple-value-bind (year month day hours minutes seconds)
             (parse-timestamp str)
           (encode-universal-time seconds minutes hours day month year)))
	((equal str "infinity") :infinity)
	((equal str "-infinity") :-infinity)
	(t (error "Unknown special timestamp value ~A" str))))

(defun precise-timestamp-parser (str)
  (multiple-value-bind (year month day hours minutes seconds milliseconds)
      (parse-timestamp str)
    (+ (encode-universal-time seconds minutes hours day month year)
       (/ milliseconds 1000.0))))

;; An interval is what you get when you subtract two timestamps. We
;; convert to a number of seconds.
(defun interval-parser (str)
  (let* ((hours (parse-integer str :start 0 :end 2))
         (minutes (parse-integer str :start 3 :end 5))
         (seconds (parse-integer str :start 6 :end 8))
         (milliseconds (parse-integer str :start 9)))
    (+ (/ milliseconds (expt 10.0 (- (length str) 9)))
       seconds
       (* 60 minutes)
       (* 60 60 hours))))


;; format for abstime/timestamp etc with ISO output syntax is
;;;    "1999-01-02 00:00:00+01"
;; which we convert to a CL universal time
(defun isodate-parser (str)
  (let ((year    (parse-integer str :start 0 :end 4))
        (month   (parse-integer str :start 5 :end 7))
        (day     (parse-integer str :start 8 :end 10))
        (hours   (parse-integer str :start 11 :end 13))
        (minutes (parse-integer str :start 14 :end 16))
        (seconds (parse-integer str :start 17 :end 19))
        (tz      (parse-integer str :start 19 :end 22)))
    (encode-universal-time seconds minutes hours day month year tz)))

;; format for date with ISO output syntax is
;;;    "1999-01-02"
;; which we convert to a CL universal time
(defun date-parser (str)
  (let ((year    (parse-integer str :start 0 :end 4))
        (month   (parse-integer str :start 5 :end 7))
        (day     (parse-integer str :start 8 :end 10)))
    (encode-universal-time 0 0 0 day month year)))


;; http://www.postgresql.org/docs/8.1/interactive/sql-expressions.html#SQL-SYNTAX-ROW-CONSTRUCTORS
;;
;; these are in the format "(foo,bar,baz)"
(defun row-parser (str)
  (assert (char= #\( (char str 0)))
  (loop :with start = 1
        :with last = (- (length str) 1)
        :for end = (or (position #\, str :start start) last)
        :collect (subseq str start end)
        :do (setq start (1+ end))
        :until (>= end last)))

(defun initialize-parsers (connection)
  (let* ((pgtypes (pg-exec connection "SELECT typname,oid FROM pg_type"))
         (tuples (pg-result pgtypes :tuples)))
    (setq *parsers* '())
    (map nil
     (lambda (tuple)
       (let* ((typname (first tuple))
              (oid (parse-integer (second tuple)))
              (type (assoc typname *type-parsers* :test #'string=)))
         (cond
           ((consp type)
            (setf (gethash (intern typname :keyword) *type-to-oid*)
                  oid)
            (push (cons oid (cdr type)) *parsers*))
           (t
            #+debug
            (warn "Unknown PostgreSQL type found: '~A' oid: '~A'"
                   typname
                   oid)))))
     tuples)))

;; FIXME should perhaps resignal parse errors as a condition derived
;; from POSTGRESQL-ERROR
(defun parse (str oid)
  (declare (type simple-string str))
  (let ((parser (assoc oid *parsers* :test #'eql)))
    (if (consp parser)
        (funcall (cdr parser) str)
        str)))

(defun lookup-type (type)
  "Given the name of a type, returns the oid of the type or NIL if
not found"
  (let ((type (etypecase type
                (symbol
                 type)
                (string
                 (intern type :keyword)))))
    (gethash type *type-to-oid*)))



;; PQescapeBytea	- converts from binary string to the
;; minimal encoding necessary to include the string in an SQL
;; INSERT statement with a bytea type column as the target.
;; 
;; The following transformations are applied
;; '\0' == ASCII  0 == \000
;; '\'' == ASCII 39 == ''
;; '\\' == ASCII 92 == \\
;; anything < 0x20, or > 0x7e ---> \ooo
;; 								(where ooo is an octal expression)
;; If not std_strings, all backslashes sent to the output are doubled.
;;
;; http://www.postgresql.org/docs/8.1/static/datatype-binary.html
(defun bytea->string (data)
  (declare (type (vector (unsigned-byte 8) *) data))
  (with-output-to-string (out)
    (loop :for octet :across data :do
          (cond ((<= 32 octet 126)
                 (write-char (code-char octet) out))
                (t
                 (format out "\\~3,'0O" octet))))))


;; EOF