File: csvread.lsp

package info (click to toggle)
nyquist 3.20%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 58,008 kB
  • sloc: ansic: 74,743; lisp: 17,929; java: 10,723; cpp: 6,690; sh: 171; xml: 58; makefile: 40; python: 15
file content (106 lines) | stat: -rw-r--r-- 4,007 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
;; csvread.lsp -- csv file reader
;;
;; Roger B. Dannenberg
;; June 2022

;; CSV file reader
;; Rows are terminated by newlines, but columns are SEPARATED by commas.
;; Therefore, every row has at least one column, which may be empty.
;; Empty columns are represented by NIL.
;; Non-empty columns are converted to FIXNUM, FLONUM, or STRING, etc.
;; Fields separated by commas are read with XLisp's READ function, but
;; when a symbol is read, the original field is trimmed of leading or
;; trailing whitespace and returned. This preserves lower-case letters.
;;
;; Limitation: if a field contains a number or quoted string followed
;; by space followed by non-space, it will be parsed as a number or
;; string, and the remainder will be ignored.
;;
;; Feature: if a field contains an array, e.g. #(1 2 3), it will be
;; parsed as an array, provided the representation has no comma or
;; double-quote character. #("," "," ",") will probably raise an error.
;;
;; If there are characters beyond the last newline, a final
;; newline is assumed and the characters are interpreted as the last row.

;; read a csv file. parameter is either a path or an open file
;;    file is closed after reading.
;;
;; returns just the opened file, which you should pass to
;; csv-read-row() to get the next row.
;;
;; if all is true, csv-read returns list of all rows
;;
(defun csv-read (inf &optional all &aux filename)
  (setf filename inf)  ;; in case we need the original parameter
  (cond ((stringp inf)
         (setf inf (open inf))))
  (cond ((not (filep inf))
         (error (format nil "csv-read: could not get a file from ~A"
                        filename))))
  ;; return one row or all rows...
  (if all
      (prog (csv row)
      process-row
        (setf row (csv-read-row inf))
        (if (null row) (return (reverse csv)))
        ;; add row to csv
        (setf csv (cons row csv))
        (go process-row))
      ;; if not all, just return the input file
      inf))


;; read a single row (line) from CSV file
;;   inf is opened file returned by csv-read()
;; returns one row or nil at end of file
;;
(defun csv-read-row(inf)
  (prog* ((line (read-line inf))
         (linelen (length line))
         (index 0) (search-from 0)
         col row comma-loc)
    (cond ((not line)
           (close inf)
           (return nil)))
  loop ;; accumulate fields into row
    (cond ((setf comma-loc (string-search "," line :start search-from))
           ;; comma could be embedded in a string
           (setf col (string-trim " " (subseq line index comma-loc)))
           (cond ((unclosed-string col)
                  (setf search-from (1+ comma-loc))
                  (go loop))) ;; search for next comma; this one is quoted
           (setf row (add-token-to-row col row)
                 index (1+ comma-loc))
           (setf search-from index)
           (go loop)))
    (setf row (add-token-to-row (subseq line index linelen) row)) ;; last col
    (return (reverse row))))


;; see if we got a partial string: begins with double-quote ("), no match yet
(defun unclosed-string (str)
  (prog ((index 1) (len (length str)) (inquote t) (escaped nil) ch)
    (if (or (= len 0) (not (eq (char str 0) #\"))) (return nil))
    ;; if we reach here, str[index] is within open quote
  scan
    (if (= index len) (return inquote))
    (setf ch (char str index))
    (cond (escaped (setf escaped nil))  ;; any char can be escaped
          ((eq ch #\\) (setf escaped t))
          ((eq ch #\")
           (setf inquote nil)
           (setf index (1- len))))  ;; found end-of-string, forces return nil
    (setf index (1+ index))
    (go scan)))
         

(defun add-token-to-row (token row)
  (let (col)
    (setf col (read (make-string-input-stream token)))
    ;; unquoted string may convert to a symbol, but we want the original
    ;; string, but with spaces trimmed, e.g. "..., xyz ,..." -> "xyz"
    (cond ((eq (type-of col) 'SYMBOL)
           (setf col (string-trim " " token))))
    (cons col row)))