File: strtok.lisp

package info (click to toggle)
acl2 8.3dfsg-2
  • links: PTS
  • area: main
  • in suites: bullseye
  • size: 309,408 kB
  • sloc: lisp: 3,311,842; javascript: 22,569; cpp: 9,029; ansic: 7,872; perl: 6,501; xml: 3,838; java: 3,738; makefile: 3,383; ruby: 2,633; sh: 2,489; ml: 763; python: 741; yacc: 721; awk: 260; csh: 186; php: 171; lex: 154; tcl: 49; asm: 23; haskell: 17
file content (131 lines) | stat: -rw-r--r-- 5,020 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
; ACL2 String Library
; Copyright (C) 2009-2013 Centaur Technology
;
; Contact:
;   Centaur Technology Formal Verification Group
;   7600-C N. Capital of Texas Highway, Suite 300, Austin, TX 78731, USA.
;   http://www.centtech.com/
;
; License: (An MIT/X11-style license)
;
;   Permission is hereby granted, free of charge, to any person obtaining a
;   copy of this software and associated documentation files (the "Software"),
;   to deal in the Software without restriction, including without limitation
;   the rights to use, copy, modify, merge, publish, distribute, sublicense,
;   and/or sell copies of the Software, and to permit persons to whom the
;   Software is furnished to do so, subject to the following conditions:
;
;   The above copyright notice and this permission notice shall be included in
;   all copies or substantial portions of the Software.
;
;   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
;   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
;   DEALINGS IN THE SOFTWARE.
;
; Original author: Jared Davis <jared@centtech.com>

(in-package "STR")
(include-book "cat")
(include-book "eqv")
(include-book "misc/definline" :dir :system)  ;; bozo
(local (include-book "arithmetic"))
(local (include-book "std/lists/revappend" :dir :system))

(defsection strtok-aux
  :parents (strtok)
  :short "Fast implementation of @(see strtok)."

  (defund strtok-aux (x n xl delimiters curr acc)
    ;; x is the string we're tokenizing, xl is its length
    ;; n is our current position in x
    ;; delimiters are the list of chars to split on
    ;; curr is the current word we're accumulating in reverse order
    ;; acc is the string list of previously found words
    (declare (type string x)
             (type (integer 0 *) n xl)
             (xargs :guard (and (stringp x)
                                (natp xl)
                                (natp n)
                                (<= xl (length x))
                                (<= n xl)
                                (character-listp delimiters)
                                (character-listp curr)
                                (string-listp acc))
                    :measure (nfix (- (nfix xl) (nfix n)))))
    (if (mbe :logic (zp (- (nfix xl) (nfix n)))
             :exec (int= n xl))
        (if curr
            (cons (rchars-to-string curr) acc)
          acc)
      (let* ((char1  (char x n))
             (matchp (member char1 delimiters)))
        (strtok-aux (the string x)
                    (the (integer 0 *) (+ 1 (lnfix n)))
                    (the integer xl)
                    delimiters
                    (if matchp nil (cons char1 curr))
                    (if (and matchp curr)
                        (cons (rchars-to-string curr) acc)
                      acc)))))

  (local (in-theory (enable strtok-aux)))

  (defthm true-listp-of-strtok-aux
    (implies (true-listp acc)
             (true-listp (strtok-aux x n xl delimiters curr acc)))
    :hints(("Goal" :induct (strtok-aux x n xl delimiters curr acc))))

  (defthm string-listp-of-strtok-aux
    (implies (string-listp acc)
             (string-listp (strtok-aux x n xl delimiters curr acc)))
    :hints(("Goal" :induct (strtok-aux x n xl delimiters curr acc))))

  (defcong streqv equal (strtok-aux x n xl delimiters curr acc) 1))



(defsection strtok
  :parents (std/strings)
  :short "Tokenize a string with character delimiters."

  :long "<p>@(call strtok) splits the string @('x') into a list of substrings,
based on @('delimiters'), a list of characters.  This is somewhat similar to
repeatedly calling the @('strtok') function in C.</p>

<p>As an example:</p>

@({
 (strtok \"foo bar, baz!\" (list #\\Space #\\, #\\!))
   -->
 (\"foo\" \"bar\" \"baz\")
})

<p>Note that all of the characters in @('delimiters') are removed, and no empty
strings are ever found in @('strtok')'s output.</p>"

  (definlined strtok (x delimiters)
    (declare (xargs :guard (and (stringp x)
                                (character-listp delimiters))))
    ;; Two tricks.
    ;;  - Use REV for better type-prescription
    ;;  - Use LEN of EXPLODE for better congruence
    (let ((rtokens (strtok-aux x 0 (mbe :logic (len (explode x))
                                        :exec (length x))
                               delimiters nil nil)))
      (mbe :logic (rev rtokens)
           :exec (reverse rtokens))))

  (local (in-theory (enable strtok)))

  (local (defthm lemma
           (implies (string-listp x)
                    (string-listp (acl2::rev x)))))

  (defthm string-listp-of-strtok
    (string-listp (strtok x delimiters)))

  (defcong streqv equal (strtok x delimiters) 1))