File: xml-parse.el

package info (click to toggle)
emacspeak 29.0-9
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 12,904 kB
  • sloc: xml: 55,354; lisp: 48,335; cpp: 2,321; tcl: 1,500; makefile: 936; python: 836; sh: 785; perl: 459; ansic: 241
file content (370 lines) | stat: -rw-r--r-- 12,713 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
;;; xml-parse --- code to efficiently read/write XML data with Elisp

;; Copyright (C) 2001 John Wiegley.

;; Author: John Wiegley <johnw@gnu.org>
;; Version: 1.5
;; Created: Feb 15, 2001
;; Keywords: convenience languages lisp xml parse data
;; URL: http://www.gci-net.com/~johnw/emacs.html

;; This file is NOT (yet) part of GNU Emacs.

;; This is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; This software is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:
;;
;; XML is yet another way of expressing recursive, attributed data
;; structures -- something which Lisp has had the capacity to do for
;; decades.
;;
;; The approach taken by xml-parse.el is to read XML data into Lisp
;; structures, and allow those same Lisp structures to be written out
;; as XML.  It should facilitate the manipulation and use of XML by
;; Elisp programs.

;; NOTE: This is not a validating parser, and makes no attempt to read
;; DTDs.  See psgml.el if you need that kind of power.
;;
;; Also, tags beginning with <? or <! are not parsed, but merely
;; included in the resulting data structure as separate string
;; entries.  These may be tested for using the function
;; `xml-tag-special-p'.  If present, they are treated just like normal
;; text, and will be inserted along with everything else.  If they
;; occur *before* the opening tag of an XML tree, they will not appear
;; in the parsed data, since such "pre-tags" are not the child of any
;; tag.

;; Here is the format of the Lisp data structure used:
;;
;;   (TAG CHILD...)
;;
;; Where TAG is either a string (naming the tag) or a list.  The list
;; form is used to identify attributes, and has the format:
;;
;;   (TAG-NAME (ATTR-NAME . ATTR-VALUE)...)
;;
;; After the TAG, there can be zero or more child structures, which
;; are either literal strings, or the same "TAG CHILD..." structure as
;; the parent.  See `insert-xml' for an EBNF grammar of this layout.

;; EXAMPLE: Given the following DocBook XML data:
;;
;;   <book id="compiler">
;;     <bookinfo>
;;       <bookbiblio>
;;         <title>My own book!</title>
;;         <edition>First</edition>
;;         <authorgroup>
;;           <author>
;;             <firstname>John</firstname>
;;             <surname>Wiegley</surname>
;;           </author>
;;         </authorgroup>
;;       </bookbiblio>
;;     </bookinfo>
;;     <chapter>
;;       <title>A very small chapter</title>
;;       <para>Wonder where the content is...</para>
;;     </chapter>
;;   </book>
;;
;; It would be parsed into this Lisp structure:
;;
;;   '(("book" ("id" . "compiler"))
;;     ("bookinfo"
;;      ("bookbiblio"
;;       ("title" "My own book!")
;;       ("edition" "FIrst")
;;       ("authorgroup"
;;        ("author"
;;         ("firstname" "John")
;;         ("surname" "Wiegley")))))
;;     ("chapter"
;;      ("title" "A very small chapter")
;;      ("para" "Wonder where the content is...")))
;;
;; Now it can easily be modified and interpreted using ordinary Lisp
;; code, without the ordeal of manipulating textual XML.  When you're
;; done modifying it, you can write it back out (complete with proper
;; indentation and newlines) using:
;;
;;   (insert-xml <DATA> t)
;;
;; See the documentation for `read-xml' and `insert-xml' for more
;; information.
;;
;; There are also a set of helper functions for accessing parts of a
;; parsed tag:
;;
;;   xml-tag-name       get the name of a tag
;;   xml-tag-attrlist   returns a tag's attribute alist
;;   xml-tag-attr       lookup a specific tag attribute
;;   xml-tag-children   returns a tag's child list
;;   xml-tag-child      lookup a specific child tag by name
;;
;; Also, the attribute list and child lists can be searched using
;; `assoc', since they roughly have the same format as an alist.

;;;###autoload
(defun read-xml ()
  "Parse XML data at point into a Lisp structure.
See `insert-xml' for a description of the format of this structure.
Point is left at the end of the XML structure read."
  (cdr (xml-parse-read)))

(defsubst xml-tag-with-attributes-p (tag)
  "Does the TAG have attributes or not?"
  (listp (car tag)))

(defsubst xml-tag-name (tag)
  "Return the name of an xml-parse'd XML TAG."
  (cond ((xml-tag-text-p tag)
         (car tag))
        ((xml-tag-with-attributes-p tag)
         (caar tag))
        (t (car tag))))

(defun xml-tag-text-p (tag)
  "Is the given TAG really just a text entry?"
  (stringp tag))

(defsubst xml-tag-special-p (tag)
  "Return the name of an xml-parse'd XML TAG."
  (and (xml-tag-text-p tag)
       (eq (aref tag 0) ?\<)))

(defsubst xml-tag-attrlist (tag)
  "Return the attribute list of an xml-parse'd XML TAG."
  (and (not (stringp (car tag)))
       (cdar tag)))

(defsubst xml-tag-attr (tag attr)
  "Return a specific ATTR of an xml-parse'd XML TAG."
  (cdr (assoc attr (xml-tag-attrlist tag))))

(defsubst xml-tag-children (tag)
  "Return the list of child tags of an xml-parse'd XML TAG."
  (cdr tag))

(defun xml-tag-child (tag name)
  "Return the first child matching NAME, of an xml-parse'd XML TAG."
  (catch 'found
    (let ((children (xml-tag-children tag)))
      (while children
        (if (string-equal name (xml-tag-name (car children)))
            (throw 'found (car children)))
        (setq children (cdr children))))))

;;;###autoload
(defun insert-xml (data &optional add-newlines public system depth ret-depth)
  "Insert DATA, a recursive Lisp structure, at point as XML.
DATA has the form:

  ENTRY       ::=  (TAG CHILD*)
  CHILD       ::=  STRING | ENTRY
  TAG         ::=  TAG_NAME | (TAG_NAME ATTR+)
  ATTR        ::=  (ATTR_NAME . ATTR_VALUE)
  TAG_NAME    ::=  STRING
  ATTR_NAME   ::=  STRING
  ATTR_VALUE  ::=  STRING

If ADD-NEWLINES is non-nil, newlines and indentation will be added to
make the data user-friendly.

If PUBLIC and SYSTEM are non-nil, a !DOCTYPE tag will be added at the
top of the document to identify it as an XML document.

DEPTH is normally for internal use only, and controls the depth of the
indentation."
  (when (and (not depth) public system)
    (insert "<?xml version=\"1.0\"?>\n")
    (insert "<!DOCTYPE " (if (stringp (car data))
                             (car data)
                           (caar data))
            " PUBLIC \"" public "\"\n  \"" system "\">\n"))
  (if (stringp data)
      (insert data)
    (let ((node (car data)) (add-nl t))
      (and depth (bolp)
           (insert (make-string (* depth 2) ? )))
      (if (stringp node)
          (insert "<" node)
        (setq node (caar data))
        (insert "<" node)
        (let ((attrs (cdar data)))
          (while attrs
            (insert " " (caar attrs) "=\"" (cdar attrs) "\"")
            (setq attrs (cdr attrs)))))
      (if (null (cdr data))
          (insert "/>")
        (insert ">")
        (setq data (cdr data))
        (while data
          (and add-newlines add-nl
               (not (stringp (car data)))
               (insert ?\n))
          (setq add-nl (insert-xml (car data) add-newlines
                                   nil nil (1+ (or depth 0)))
                data (cdr data)))
        (when add-nl
          (and add-newlines (insert ?\n))
          (and depth (insert (make-string (* depth 2) ? ))))
        (insert "</" node ">"))
      t)))

;;;###autoload
(defun xml-reformat-tags ()
  "If point is on the open bracket of an XML tag, reformat that tree.
Note that this only works if the opening tag starts at column 0."
  (interactive)
  (save-excursion
    (let* ((beg (point)) (tags (read-xml)))
      (delete-region beg (point))
      (insert-xml tags t))))

;;; Internal Functions

(defun xml-parse-profile ()
  (interactive)
  (let ((elp-function-list
         '(buffer-substring-no-properties
           char-after
           char-before
           forward-char
           looking-at
           match-string-no-properties
           match-beginning
           match-end
           point
           re-search-forward
           read-xml
           xml-parse-read
           search-forward
           string-equal
           stringp
           substring
           xml-parse-concat)))
    (elp-instrument-list)))

(defsubst xml-parse-skip-tag ()
  (cond
   ((eq (char-after) ??)
    (search-forward "?>"))
   ((looking-at "!--")
    (search-forward "-->"))
   (t                                   ; must be <!...>
    (re-search-forward "[[>]")
    (if (eq (char-before) ?\[)
        (let ((depth 1))
          (while (and (> depth 0)
                      (if (re-search-forward "[][]")
                          t
                        (error "Pos %d: Unclosed open bracket in
  <! tag"
                               (point))))
            (if (eq (char-before) ?\[)
                (setq depth (1+ depth))
              (setq depth (1- depth))))
          (search-forward ">"))))))

(defsubst xml-parse-add-non-ws (text lst)
  (let ((i 0) (l (length text)) non-ws)
    (while (< i l)
      (unless (memq (aref text i) '(?\n ?\t ? ))
        (setq i l non-ws t))
      (setq i (1+ i)))
    (if (not non-ws)
        lst
      (setcdr lst (list text))
      (cdr lst))))

(defsubst xml-parse-concat (beg end lst)
  "Add the string from BEG to END to LST, ignoring pure whitespace."
  (save-excursion
    (goto-char beg)
    (while (search-forward "<" end t)
      (setq lst (xml-parse-add-non-ws
                 (buffer-substring-no-properties beg (1- (point))) lst)
            beg (1- (point)))
      (xml-parse-skip-tag)
      (setq lst (xml-parse-add-non-ws
                 (buffer-substring-no-properties beg (point)) lst)
            beg (point)))
    (if (/= beg end)
        (setq lst (xml-parse-add-non-ws
                   (buffer-substring-no-properties beg end) lst)))
    lst))

(defun xml-parse-read (&optional inner-p)
  (let ((beg (search-forward "<" nil t)) after)
    (while (and beg (memq (setq after (char-after)) '(?! ??)))
      (xml-parse-skip-tag)
      (setq beg (search-forward "<" nil t)))
    (when beg
      (if (eq after ?/)
          (progn
            (search-forward ">")
            (cons (1- beg)
                  (buffer-substring-no-properties (1+ beg) (1- (point)))))
        (skip-chars-forward "^ \t\n/>")
        (cons
         (1- beg)
         (progn
           (setq after (point))
           (skip-chars-forward " \t\n")
           (let* ((single (eq (char-after) ?/))
                  (tag (buffer-substring-no-properties beg after))
                  attrs data-beg data)
             ;; handle the attribute list, if present
             (cond
              (single
               (skip-chars-forward " \t\n/>"))
              ((eq (char-after) ?\>)
               (forward-char 1))
              (t
               (let* ((attrs (list t))
                      (lastattr attrs)
                      (end (search-forward ">")))
                 (goto-char after)
                 (while (re-search-forward
                         "\\([^ \t\n=]+\\)=\"\\([^\"]+\\)\"" end t)
                   (let ((attr (cons (match-string-no-properties 1)
                                     (match-string-no-properties 2))))
                     (setcdr lastattr (list attr))
                     (setq lastattr (cdr lastattr))))
                 (goto-char end)
                 (setq tag (cons tag (cdr attrs))
                       single (eq (char-before (1- end)) ?/)))))
             ;; return the tag and its data
             (if single
                 (list tag)
               (setq tag (list tag))
               (let ((data-beg (point)) (tag-end (last tag)))
                 (while (and (setq data (xml-parse-read t))
                             (not (stringp (cdr data))))
                   (setq tag-end (xml-parse-concat data-beg (car data)
                                                   tag-end)
                         data-beg (point))
                   (setcdr tag-end (list (cdr data)))
                   (setq tag-end (cdr tag-end)))
                 (xml-parse-concat data-beg (or (car data)
                                                (point-max)) tag-end)
                 tag)))))))))

(provide 'xml-parse)

;;; xml-parse.el ends here