File: strmat.dats

package info (click to toggle)
ats2-lang 0.1.3-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 28,136 kB
  • ctags: 20,441
  • sloc: ansic: 354,696; makefile: 2,679; sh: 638
file content (171 lines) | stat: -rw-r--r-- 4,849 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
(*
// An implementation of string matching in continuation-passing style.
// The code is translated by Hongwei Xi from an earlier version in DML,
// which was originally adopted by him from a version by Frank Pfenning
// (fp+ AT cs DOT cmu DOT edu)
*)
(* ****** ****** *)
//
// Author: Hongwei Xi (May 2007)
//
(* ****** ****** *)
//
// HX-2012-07-20: ported to ATS2
//
(* ****** ****** *)
//
// HX-2012-06-21: compiled to run with ATS/Postiats
//
(* ****** ****** *)
//
#include "share/atspre_staload.hats"
//
(* ****** ****** *)

datatype regexp (int) =
  | Any_char(1) (* any character *)
  | Empty(1) (* empty string matches Empty *)
  | Char(1) of Char (* "c" matches Char (c) *)
  // every char other than "c" matches Char (c)
  | Char_not(1) of Char
  // every char in [c1, c2] matches Chars (c1, c2)
  | Chars(1) of (Char, Char)
  // every char not in [c1, c2] matches Chars (c1, c2)
  | Chars_not(1) of (Char, Char)
  // cs matches Alt(p1, p2) if cs matches either p1 or p2
  | {i,j:nat} Alt(i+j+1) of (regexp i, regexp j)
  // cs matches Seq(p1, p2) if a prefix of cs matches p1 and the rest matches p2
  | {i,j:nat} Seq(i+j+1) of (regexp i, regexp j)
  // cs matches Star(p) if cs matches some, possibly 0, copies of p
  | {i:nat} Star(i+1) of regexp i
// end of [regexp]

typedef Regexp = [i:nat] regexp i

// Note that [acc] is verified to be terminating!

sortdef two = {a:nat | a < 2}

fun acc
  {i0,n,i:nat}
  {b:two | i+b <= i0}
  .<n,i>. ( // metric for termination verification
  cs0: string i0, i0: int i0, p: regexp n, i: int i, b: int b,
  k: {i':nat; b':two | i'+b' <= i+b} (int i', int b') -<cloref1> Bool
) : Bool = let
(*
  val () = (print "acc: enter"; print_newline ())
*)
in
//
case+ p of
| Any_char () => if i > 0 then k (i-1, 1) else false

| Empty () => k (i, b)

| Char c => (
    if i > 0 then (if c = cs0[i0-i] then k (i-1, 1) else false) else false
  ) // end of [Char]

| Char_not c => (
    if i > 0 then (if c <> cs0[i0-i] then k (i-1, 1) else false) else false
  ) // end of [Char_not]

| Chars (c1, c2) => (
    if i > 0 then let
      val c = cs0[i0-i]
    in
      if c1 <= c then (if c <= c2 then k (i-1, 1) else false) else false
    end else false // end of [if]
  ) // end of [Chars]

| Chars_not (c1, c2) => (
    if i > 0 then let
      val c = cs0[i0-i]
    in 
      if c < c1 then k (i-1, 1) else if c > c2 then k (i-1, 1) else false
    end else false
  ) // end of [Chars_not]

| Alt (p1, p2) => (
    if acc (cs0, i0, p1, i, b, k) then true else acc (cs0, i0, p2, i, b, k)
  ) // end of [Alt]

| Seq (p1, p2) => (
    acc (cs0, i0, p1, i, b, lam (i', b') => acc (cs0, i0, p2, i', b', k))
  ) // end of [Seq]

| Star p0 => (
    if k (i, b) then true
    else (
      acc (cs0, i0, p0, i, 0,
        lam (i', b') => if b' = 0 then false else acc (cs0, i0, p, i', 1, k)
      ) // end of [acc]
    ) // end of [if]
  ) // end of [Star]
//
end // end of [acc]

extern fun accept (cs0: String, p: Regexp): Bool

implement
accept (cs0, p) = let
  val i0 = g1uint2int (string_length cs0)
in
  acc (cs0, i0, p, i0, 0, lam (i, _) => i = 0)
end // end of [accept]

//
// HX: some tests
//

val regexp_digit = Chars ('0', '9') 
val regexp_digits = Star (Chars ('0', '9'))

val regexp_uint =
  Alt (Char '0', Seq (Chars ('1', '9'), regexp_digits))

val regexp_int =
  Alt (regexp_uint, Seq (Alt (Char '-', Char '+'), regexp_uint))

val regexp_dot_sats =
  Seq (Star Any_char, Seq (Char '.', Seq (Char 's', Seq (Char 'a', Seq (Char 't', Char 's')))))

val regexp_dot_dats =
  Seq (Star Any_char, Seq (Char '.', Seq (Char 'd', Seq (Char 'a', Seq (Char 't', Char 's')))))

implement
main (argc, argv) = let
  val ans10 = accept ("123456789", regexp_int)
  val () = assertloc (ans10)
  val ans11 = accept ("+123456789", regexp_int)
  val () = assertloc (ans11)
  val ans12 = accept ("-123456789", regexp_int)
  val () = assertloc (ans12)
  val ans20 = accept ("abcde", regexp_int)
  val () = assertloc (~ans20)
  val ans31 = accept ("abcde.sats", regexp_dot_sats)
  val () = assertloc (ans31)
  val ans32 = accept ("abcde.sats", regexp_dot_dats)
  val () = assertloc (~ans32)
  val ans41 = accept ("abcde.dats", regexp_dot_sats)
  val () = assertloc (~ans41)
  val ans42 = accept ("abcde.dats", regexp_dot_dats)
  val () = assertloc (ans42)
in
(*
  print ("ans10(true) = "); print ans10; print_newline ();
  print ("ans11(true) = "); print ans11; print_newline ();
  print ("ans12(true) = "); print ans12; print_newline ();
  print ("ans20(false) = "); print ans20; print_newline ();
  print ("ans31(true) = "); print ans31; print_newline ();
  print ("ans32(false) = "); print ans32; print_newline ();
  print ("ans41(false) = "); print ans41; print_newline ();
  print ("ans42(true) = "); print ans42; print_newline ();
*)
  0(*normal*)
end // end of [main]

(* ****** ****** *)

(* end of [strmat.dats] *)