File: ada_trees-scanner.ads

package info (click to toggle)
asis 2015-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 13,640 kB
  • sloc: ada: 140,372; makefile: 260; sh: 50; xml: 48; csh: 10
file content (283 lines) | stat: -rw-r--r-- 12,658 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
------------------------------------------------------------------------------
--                                                                          --
--                            GNAT2XML COMPONENTS                           --
--                                                                          --
--                      G N A T 2 X M L . S C A N N E R                     --
--                                                                          --
--                                 S p e c                                  --
--                                                                          --
--                  Copyright (C) 2012-2014, AdaCore, Inc.                  --
--                                                                          --
-- Gnat2xml is free software; you can redistribute it and/or modify it      --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software Foundation;  either version 2,  or  (at your option)  any later --
-- version. Gnat2xml is distributed  in the hope  that it will be useful,   --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of MER-      --
-- CHANTABILITY or  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General  --
-- Public License for more details. You should have received a copy of the  --
-- GNU General Public License distributed with GNAT; see file COPYING. If   --
-- not, write to the Free Software Foundation, 59 Temple Place Suite 330,   --
-- Boston, MA 02111-1307, USA.                                              --
-- The gnat2xml tool was derived from the Avatox sources.                   --
------------------------------------------------------------------------------

pragma Ada_2012;

with ASIS_UL.Vectors;

with Ada_Trees.Buffers; use Ada_Trees.Buffers;
use Ada_Trees.Buffers.Marker_Vectors;
--  use all type Ada_Trees.Buffers.Marker_Vector;

package Ada_Trees.Scanner is

   --  This package provides a simple lexical scanner for Ada tokens. There are
   --  some unusual things about this scanner:
   --
   --     We don't distinguish most of the different kinds of tokens; most are
   --     lumped together under the Lexeme kind, and reserved words are lumped
   --     together under Reserved_Word. We only distinguish where we need to.
   --
   --     We do not ignore comments; a comment is considered to be a token.
   --
   --     We do not ignore blank lines. We do ignore a single line break,
   --     if Ignore_Single_Line_Breaks is True. Other whitespace (blanks and
   --     tabs) between tokens is always ignored.
   --
   --     We don't check for errors, because we're in ASIS, where Ada code is
   --     known to be legal.

   type Token_Kind is
     (Nil,
      Start_Of_Input,
      End_Of_Input,
      Identifier,
      Reserved_Word,
      String_Literal,
      Numeric_Literal,
      Lexeme, -- misc lexemes as defined in the RM
      Pp_Off_Comment,
      --  A whole-line comment that matches the --pp-off string
      Pp_On_Comment,
      --  A whole-line comment that matches the --pp-on string
      Other_Whole_Line_Comment,
   --  A comment that appears by itself on a line. Multiple comments that may
   --  be filled as a "paragraph" are combined into a single Whole_Line_Comment
   --  token. This comment is a Whole_Line_Comment.
      End_Of_Line_Comment,
   --  A comment that appears at the end of a line, after some other
   --  program text. The above comment starting "misc lexemes" is an
   --  End_Of_Line_Comment.
      End_Of_Line, -- First in a series of one or more NLs.
      Blank_Line); -- Second, third, ... in a series of one or more NLs.

   subtype Whole_Line_Comment is Token_Kind with
     Static_Predicate => Whole_Line_Comment in
       Pp_Off_Comment | Pp_On_Comment | Other_Whole_Line_Comment;

   subtype Comment_Kind is Token_Kind with
        Predicate => Comment_Kind in Whole_Line_Comment | End_Of_Line_Comment;

   subtype Pp_Off_On_Comment is Token_Kind with
        Predicate => Pp_Off_On_Comment in Pp_Off_Comment | Pp_On_Comment;

   type Source_Location is record
      Line, Col : Positive; -- 1-based line and column numbers
      First     : Positive;
      Last      : Natural;

      Firstx, Lastx : Marker;
      --  ???Same information as First&Last. These should replace First&Last
      --  eventually. Note that Lastx points one past the last character.
   end record;

   function First_Pos (Input : Buffer; Sloc : Source_Location) return Positive;
   function Last_Pos (Input : Buffer; Sloc : Source_Location) return Natural;
   --  Absolute position in Input (parameter of Get_Tokens) of the start and
   --  end of the token. So the text of the token is exactly equal to the slice
   --  Input (First..Last). Note that Input'First might not be 1.

   function Image
     (Sloc : Source_Location)
      return String is
     (Image (Sloc.Line) &
      ":" &
      Image (Sloc.Col) &
      "(" &
      Image (Sloc.First) &
      ".." &
      Image (Sloc.Last) &
      ")");

   function Message_Image
     (Sloc : Source_Location) return String is
     (Image (Sloc.Line) &
      ":" &
      Image (Sloc.Col));

   function Message_Image
     (Tree : Ada_Tree; Sloc : Source_Location) return String is
      --  Tree is the A_Compilation_Unit node
     (Get_Name_String (Tree.Source_File) &
      ":" &
      Image (Sloc.Line) &
      ":" &
      Image (Sloc.Col));

   type Token is record
      Kind : Token_Kind := Nil;

      Text : Name_Id;
      --  The text of the token as it appears in the source, with these
      --  exceptions and clarifications:
      --
      --  Start_Of_Input and End_Of_Input have Text = "".
      --
      --  For Blank_Line: does not include the text of the preceding
      --  End_Of_Line or Blank_Line (i.e. it is usually just LF, but could
      --  be CR/LF -- not LF,LF nor CR,LF,CR,LF).
      --
      --  For comments, the text of the comment excluding the initial "--"
      --  and leading and trailing blanks, and followed by an extra NL. For
      --  multi-line comment "paragraphs", used for filling, NL terminates each
      --  line. The NL at the end isn't really part of the comment; the next
      --  token in the stream will be End_Of_Line. The reason for the extra NL
      --  is that GNATCOLL.Paragraph_Filling expects it, so it's simpler and
      --  more efficient this way.

      Normalized : Name_Id;
      --  Same as Text, or converted to lower case, depending on the Kind.
      --  Comments have Normalized = No_Name, so we can detect specific
      --  reserved words. For example, the "BEGIN" reserved word will have Text
      --  = "BEGIN" and Normalized = "begin". The comment "-- begin" will have
      --  Text = "begin" and Normalized = No_Name.

      Leading_Blanks : Natural;
      --  For comments, the number of leading blanks, which are blanks after
      --  the initial "--" and before any nonblank characters. For other
      --  tokens, zero.

      Width : Natural;
      --  For most tokens, this is the width of the token, i.e. the same as
      --  Sloc.Last-Sloc.First+1, and the same as the length of Text. For
      --  multi-line comments, this is the width of the widest line. For all
      --  comments, the initial "--" and any leading blanks are included, but
      --  the NL's are not.

      Is_Special_Comment : Boolean;
      --  True if this is a "special" comment; that is, one that should not be
      --  formatted in any way. False for other comments and for non-comments.

      Is_Fillable_Comment : Boolean;
      --  True if this is a fillable comment; that is, one that should be
      --  filled if filling is turned on. False for other comments and for
      --  non-comments. Special comments are not fillable; Is_Special_Comment
      --  implies not Is_Fillable_Comment.

      Sloc : Source_Location;
   end record;

   type Token_Index is new Positive;
   type Token_Array is array (Token_Index range <>) of Token;
   package Token_Vectors is new ASIS_UL.Vectors
     (Token_Index,
      Token,
      Token_Array);
   subtype Token_Vector is Token_Vectors.Vector;
   type Token_Vector_Ptr is access all Token_Vector;
   use Token_Vectors;
   --  use all type Token_Vector;

   function Line_Length
     (Input    : in out Buffer;
      Ends     : Marker_Vector;
      Line_Num : Positive)
      return     Natural;
   --  Doesn't count the NL character. This doesn't work for CR/LF line
   --  endings, which is OK, because we only use it for internally-generated
   --  text that always uses a single NL.

   Default_Pp_Off_String : aliased constant W_Str := "--!pp off";
   Default_Pp_On_String : aliased constant W_Str := "--!pp on";

   type Pp_Off_On_Delimiters_Rec is record
      Off : access constant W_Str := Default_Pp_Off_String'Access;
      On : access constant W_Str := Default_Pp_On_String'Access;
      --  Text of comments for turning pretting printing off and on, including
      --  the leading '--'. For example, if the user specified --pp-off='pp-',
      --  then Off will be "--pp-". A whole-line comment of the form "--pp-"
      --  will disable pretty printing.
      --  We do not want these comments to be fillable.
   end record;

   Gen_Plus : constant W_Str := "--gen+"; -- (style) two spaces required
   Gen_Minus : constant W_Str := "--gen-";
   --  Strings to mark start and end of automatically generated code.

   procedure Get_Tokens
     (Input                     : in out Buffer;
      Result                    : out Token_Vectors.Vector;
      Pp_Off_On_Delimiters      : Pp_Off_On_Delimiters_Rec;
      Ignore_Single_Line_Breaks : Boolean           := True;
      Max_Tokens                : Token_Index       := Token_Index'Last;
      Line_Ends                 : Marker_Vector_Ptr := null;
      Gen_Regions               : Token_Vector_Ptr  := null);
   --  Return in Result the sequence of tokens in the Input string. The
   --  first one is always Start_Of_Input, and the last one End_Of_Input.
   --  Ignore_Single_Line_Breaks means we should skip any End_Of_Line tokens
   --  (but not Blank_Lines). Max_Tokens places a limit on the number of tokens
   --  (not counting Start_Of_Input); we quit before reaching end of input if
   --  we've gotten that many.
   --
   --  If Line_Ends is non-null, we compute all the line endings in
   --  Line_Ends.all, which is a mapping from line numbers to Markers in the
   --  Input string. Each element points to a NL character in the corresponding
   --  buffer.
   --
   --  Comments starting with Gen_Plus and Gen_Minus, and tokens in between, do
   --  not appear in Result. If Gen_Regions is non-null, we use it to return
   --  the sequence of Gen_Plus and Gen_Minus tokens.  The generated code is in
   --  the slices Gen_Regions(1).Sloc..Gen_Regions(2).Sloc,
   --  Gen_Regions(3).Sloc..Gen_Regions(4).Sloc, and so on.

   function Next_Lexeme
     (Tokens : Token_Vectors.Vector;
      Index  : Token_Index)
      return   Token;
   --  Returns the next token after Index that is not a blank line or comment

   function Prev_Lexeme
     (Tokens : Token_Vectors.Vector;
      Index  : Token_Index)
      return   Token;
   --  Returns the previous token before Index that is not a blank line or
   --  comment

   function Get_Token (Input : W_Str) return Token;
   --  Get just one token, ignoring single line breaks

   procedure Check_Same_Tokens (X, Y : Token_Vectors.Vector);
   --  Checks that X and Y are the same except for Slocs and line breaks; raise
   --  an exception if not.

   function In_Gen_Regions
     (Line : Positive; Gen_Regions : Token_Vector) return Boolean;
   --  True if the line number is within one of the regions of Gen_Regions.
   --  The comments are always on a line by themselves, so we don't have to
   --  worry about column numbers.

   procedure Put_Token (Tok : Token; Index : Token_Index := 1);
   procedure Put_Tokens
     (Tokens    : Token_Vectors.Vector;
      First     : Token_Index'Base := 1;
      Last      : Token_Index'Base := Token_Index'Last;
      Highlight : Token_Index'Base := 0);
   --  Put token(s) to standard output (even if Text_IO.Current_Output has been
   --  redirected). The tokens come out in compilable form, one per line, with
   --  the text of the token first, and the other information commented out.
   --  This one-token-per line code can be used for testing the scanner -- it
   --  should have identical semantics to the original Ada code. First and Last
   --  indicate a slice of Tokens, and we tolerate out-of-bounds indices.
   --  We draw a comment line before Highlight.

end Ada_Trees.Scanner;