File: coq_lex.mll

package info (click to toggle)
coq 8.16.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 40,596 kB
  • sloc: ml: 219,376; sh: 3,545; python: 3,231; ansic: 2,529; makefile: 767; lisp: 279; javascript: 63; xml: 24; sed: 2
file content (148 lines) | stat: -rw-r--r-- 4,712 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
(************************************************************************)
(*         *   The Coq Proof Assistant / The Coq Development Team       *)
(*  v      *         Copyright INRIA, CNRS and contributors             *)
(* <O___,, * (see version control and CREDITS file for authors & dates) *)
(*   \VV/  **************************************************************)
(*    //   *    This file is distributed under the terms of the         *)
(*         *     GNU Lesser General Public License Version 2.1          *)
(*         *     (see LICENSE file for the text of the license)         *)
(************************************************************************)

{
  exception Unterminated

  let utf8_adjust = ref 0

  let utf8_lexeme_start lexbuf =
    Lexing.lexeme_start lexbuf - !utf8_adjust
}

let space = [' ' '\n' '\r' '\t' '\012'] (* '\012' is form-feed *)

let number = [ '0'-'9' ]+

let string = "\"" _+ "\""

let alpha = ['a'-'z' 'A'-'Z']

let ident = alpha (alpha | number | '_' | "'")*

let undotted_sep = ((number | '[' ident ']') space* ':' space*)? '{' | '}' | '-'+ | '+'+ | '*'+

let vernac_control = "Fail" | "Time" | "Redirect" space+ string | "Timeout" space+ number

let dot_sep = '.' (space | eof)

let utf8_extra_byte = [ '\x80' - '\xBF' ]

rule coq_string = parse
  | "\"\"" { coq_string lexbuf }
  | "\"" { () }
  | eof { () }
  | utf8_extra_byte { incr utf8_adjust; coq_string lexbuf }
  | _ { coq_string lexbuf }

and comment = parse
  | "(*" { let _  = comment lexbuf in comment lexbuf }
  | "\"" { let () = coq_string lexbuf in comment lexbuf }
  | "*)" { Some (utf8_lexeme_start lexbuf) }
  | eof { None }
  | utf8_extra_byte { incr utf8_adjust; comment lexbuf }
  | _ { comment lexbuf }

and quotation n l = parse
| eof { raise Unterminated }
| utf8_extra_byte { incr utf8_adjust; quotation n l lexbuf }
| "{" { quotation_nesting n l 1 lexbuf }
| "}" { quotation_closing n l 1 lexbuf }
| _ { quotation n l lexbuf }

and quotation_nesting n l v = parse
| eof { raise Unterminated }
| utf8_extra_byte { incr utf8_adjust; quotation n l lexbuf }
| "{" {
  if n = v+1 then quotation n (l+1) lexbuf
  else quotation_nesting n l (v+1) lexbuf
}
| "}" { quotation_closing n l 1 lexbuf }
| _ { quotation n l lexbuf }

and quotation_closing n l v = parse
| eof { raise Unterminated }
| utf8_extra_byte { incr utf8_adjust; quotation n l lexbuf }
| "}" {
  if n = v+1 then
    if l = 1 then ()
    else quotation n (l-1) lexbuf
  else quotation_closing n l (v+1) lexbuf
}
| "{" { quotation_nesting n l 1 lexbuf }
| _ { quotation n l lexbuf }

and quotation_start n = parse
| eof { raise Unterminated }
| utf8_extra_byte { incr utf8_adjust; quotation n 1 lexbuf }
| "{" { quotation_start (n+1) lexbuf }
| _ { quotation n 1 lexbuf }

(** NB : [mkiter] should be called on increasing offsets *)

and sentence initial stamp = parse
  | "(*" {
      match comment lexbuf with
        | None -> raise Unterminated
        | Some comm_last ->
          stamp comm_last Tags.Script.comment;
          sentence initial stamp lexbuf
    }
  | "\"" {
      let () = coq_string lexbuf in
      sentence false stamp lexbuf
    }
  | ".." {
      (* We must have a particular rule for parsing "..", where no dot
	 is a terminator, even if we have a blank afterwards
	 (cf. for instance the syntax for recursive notation).
	 This rule and the following one also allow to treat the "..."
	 special case, where the third dot is a terminator. *)
      sentence false stamp lexbuf
    }
  | dot_sep {
      (* The usual "." terminator *)
      stamp (utf8_lexeme_start lexbuf) Tags.Script.sentence;
      sentence true stamp lexbuf
    }
  | (vernac_control space+)* undotted_sep {
      (* Separators like { or } and bullets * - + are only active
	 at the start of a sentence *)
      if initial then stamp (utf8_lexeme_start lexbuf + String.length (Lexing.lexeme lexbuf) - 1) Tags.Script.sentence;
      sentence initial stamp lexbuf
    }
  | ['a'-'z' 'A'-'Z'] ":{{" {
      quotation_start 2 lexbuf;
      sentence false stamp lexbuf
    }
  | space+ {
       (* Parsing spaces is the only situation preserving initiality *)
       sentence initial stamp lexbuf
    }
  | utf8_extra_byte { incr utf8_adjust; sentence false stamp lexbuf }
  | eof { if initial then () else raise Unterminated }
  | _ {
      (* Any other characters *)
      sentence false stamp lexbuf
    }

{

  (** Parse sentences in string [slice], tagging last characters
      of sentences with the [stamp] function.
      It will raise [Unterminated] if [slice] ends with an unfinished
      sentence.
  *)

  let delimit_sentences stamp slice =
    utf8_adjust := 0;
    sentence true stamp (Lexing.from_string slice)

}