1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
(** Splitting a string according to a character separator
This problem was suggested by Georges-Axel Jaloyan (Amazon Web Services),
during a talk at the first Dafny workshop (London, 2024).
The problem is stated as follows: split a string `s` according to a
character separator `sep`, as a sequence of substrings `[s0;s1;...sn]`
where
- the separator does not appear in any `si`;
- the concatenation `s0+sep+s1+sep+...+sep+sn` is equal to `s`.
Examples. Assuming that the separator character is '.', here are some
expected input/output:
+---------+-------------------+
| input | output |
+---------+-------------------+
| "" | [""] |
| "." | ["", ""] |
| ".." | ["", "", ""] |
| "abc" | ["abc"] |
| "abc." | ["abc", ""] |
| ".abc" | ["", "abc"] |
| "a.bc" | ["a", "bc"] |
| "a..bc" | ["a", "", "bc"] |
+---------+-------------------+
Note that the output sequence is never empty.
Additionnally, we possibly set a limit to the total number of
substrings in the output. We do so with an integer parameter `limit`.
If `limit = -1`, there is no limit. Otherwise, `limit >= 1`.
When a limit is set, the last substring in the output may contain the
separator.
Examples. Assuming that the separator character is '.' and the limit
is 2, here are some expected input/output:
+-----------+----------------+
| input | output |
+-----------+----------------+
| "a." | ["a", ""] |
| "b.." | ["b", "."] |
| "b..." | ["b", ".."] |
| "ba.b.b." | ["ba", "b.b."] |
+-----------+----------------+
Author: Jean-Christophe Filliâtre (CNRS)
*)
module SplitString
use int.Int
use seq.Seq
use seq.FreeMonoid
use seq.Mem
(** Characters and strings *)
type char
val (=) (x y: char) : bool
ensures { result <-> x=y }
type string_ = seq char
(** Specification *)
(** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
for a non-empty sequence of strings *)
let rec function concat (ss: seq string_) (sep: char) : string_
requires { length ss >= 1 } variant { length ss }
= if length ss = 1 then ss[0]
else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1])
(** a mere shortcut, for convenience *)
predicate notin (sep: char) (s: string_)
= not (mem sep s)
(** Code *)
let ([]) (s: string_) (i: int) : char
requires { [@expl:index in string bounds] 0 <= i < length s }
= get s i
let split_string (s: string_) (sep: char) (limit: int) : (ss: seq string_)
requires { limit = -1 || limit >= 1 }
ensures { length ss >= 1 }
ensures { limit = -1 || length ss <= limit }
ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
ensures { length ss = limit || notin sep ss[length ss - 1] }
ensures { concat ss sep == s }
= if limit = 1 then return (singleton s);
let ref ss = empty in
let ref i = 0 in
let ref b = 0 in
while i < length s do
invariant { 0 <= b <= i <= length s }
invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
invariant { forall j. b <= j < i -> s[j] <> sep }
invariant { limit = -1 || length ss < limit-1 }
invariant { concat (snoc ss s[b..]) sep == s }
variant { length s - i }
if s[i] = sep then (
ss <- snoc ss s[b..i];
assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
if length ss = limit-1 then return snoc ss s[i+1..];
b <- i+1;
);
i <- i+1
done;
return snoc ss s[b..]
end
module SplitStringOCaml
use int.Int
use seq.Seq
use seq.FreeMonoid
use seq.Mem
use mach.int.Int63
use mach.peano.Peano
use mach.peano.Int63
use queue.Queue as Q
(** Characters and strings *)
type char
val (=) (x y: char) : bool
ensures { result <-> x=y }
type string_ = private {
ghost contents_: seq char;
}
meta coercion function contents_
(** Specification *)
(** `concat [s0;s1;...;sn] sep` is `s0+sep+s1+sep+...+sep+sn`
for a non-empty sequence of strings *)
let rec ghost function concat (ss: seq string_) (sep: char) : seq char
requires { length ss >= 1 } variant { length ss }
= if length ss = 1 then ss[0].contents_
else concat ss[0..length ss - 1] sep ++ (cons sep ss[length ss - 1].contents_)
(** a mere shortcut, for convenience *)
predicate notin (sep: char) (s: string_)
= not (mem sep s)
(** Code *)
val length (s: string_) : int63
ensures { result = length s }
val sub (s: string_) (i j: int63) : string_
requires { [@expl:index in string bounds] 0 <= i <= j <= length s }
ensures { result = s[i..j] }
val ([]) (s: string_) (i: int63) : char
requires { [@expl:index in string bounds] 0 <= i < length s }
ensures { result = s[i] }
let partial split_string (s: string_) (sep: char) (limit: int63) : (ss: Q.t string_)
requires { limit = -1 || limit >= 1 }
ensures { length ss >= 1 }
ensures { limit = -1 || length ss <= limit }
ensures { forall j. 0 <= j < length ss - 1 -> notin sep ss[j] }
ensures { length ss = limit || notin sep ss[length ss - 1] }
ensures { concat ss sep == s }
= let ref ss = Q.create () in
if limit = (1:int63) then (Q.push s ss; return ss);
let ref i = 0: int63 in
let ref b = 0: int63 in
let ghost ref suffix = s in
while i < length s do
invariant { 0 <= b <= i <= length s }
invariant { forall j. 0 <= j < length ss -> notin sep ss[j] }
invariant { forall j. b <= j < i -> s[j] <> sep }
invariant { limit = -1 || length ss < limit-1 }
invariant { suffix = s[b..] }
invariant { concat (snoc ss suffix) sep == s }
variant { length s - i }
if s[i] = sep then (
Q.push (sub s b i) ss;
assert { s[b..] == s[b..i] ++ cons sep s[i+1..] };
if to_int63 (Q.length ss) = limit - (1:int63) then
(Q.push (sub s (i+1) (length s)) ss; return ss);
b <- i+1;
suffix <- sub s b (length s)
);
i <- i+1
done;
Q.push (sub s b (length s)) ss;
return ss
end
|