File: ad-parse.adb

package info (click to toggle)
adabrowse 4.0.3-5
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 2,364 kB
  • sloc: ada: 29,770; makefile: 137; ansic: 4
file content (654 lines) | stat: -rw-r--r-- 21,988 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
-------------------------------------------------------------------------------
--
--  This file is part of AdaBrowse.
--
-- <STRONG>Copyright (c) 2002 by Thomas Wolf.</STRONG>
-- <BLOCKQUOTE>
--    AdaBrowse is free software; you can redistribute it and/or modify it
--    under the terms of the  GNU General Public License as published by the
--    Free Software  Foundation; either version 2, or (at your option) any
--    later version. AdaBrowse is distributed in the hope that it will be
--    useful, but <EM>without any warranty</EM>; without even the implied
--    warranty of <EM>merchantability or fitness for a particular purpose.</EM>
--    See the GNU General Public License for  more details. You should have
--    received a copy of the GNU General Public License with this distribution,
--    see file "<A HREF="GPL.txt">GPL.txt</A>". If not, write to the Free
--    Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
--    USA.
-- </BLOCKQUOTE>
--
-- <DL><DT><STRONG>
-- Author:</STRONG><DD>
--   Thomas Wolf  (TW)
--   <ADDRESS><A HREF="mailto:twolf@acm.org">twolf@acm.org</A></ADDRESS></DL>
--
-- <DL><DT><STRONG>
-- Purpose:</STRONG><DD>
--   Simplified Ada 95 parser. Parses the source until it finds the
--   name of the library unit declaration. Note: this parser (and its
--   scanner!) doesn't need to be hyper-fast, it'll only be used for
--   krunched file names, and then parse the file only up to the
--   unit name.</DL>
--
-- <!--
-- Revision History
--
--   26-MAR-2002   TW  Initial version.
--   21-JUN-2002   TW  Uses Util.Text now instead of Ada.Strings.Unbounded.
-- -->
-------------------------------------------------------------------------------

pragma License (GPL);

with Ada.Strings.Maps;
with Ada.Text_IO;

with Util.Files.Text_IO;
with Util.Strings;
with Util.Text.Internal;

pragma Elaborate_All (Util.Files.Text_IO);
pragma Elaborate_All (Util.Text);

package body AD.Parse is

   package UT renames Util.Text;

   ----------------------------------------------------------------------------
   --  Scanning routines. This is a very simple, line-based scanner. Not
   --  particularly efficient, but does the job nicely.

   package Scanner is

      type Token is
        (Other_Token,
         Left_Paren_Token, Right_Paren_Token, Semicolon_Token, Period_Token,
         With_Token, Use_Token, Pragma_Token, Type_Token, Package_Token,
         Procedure_Token, Function_Token, Is_Token, New_Token, Return_Token,
         Private_Token, Generic_Token, Name_Token, String_Token);

      procedure Init (File_Name : in String);

      procedure Advance;

      function Current_Token return Token;

      function Image   return UT.Unbounded_String;

      procedure Close;

      Scan_Error : exception;

   private

      pragma Inline (Current_Token);

   end Scanner;

   package body Scanner is

      use Util.Strings;

      F : Ada.Text_IO.File_Type;

      function Ada_Skip_String
        (S     : in String;
         Delim : in Character)
        return Natural
      is
      begin
         return Skip_String (S, Delim, Delim);
      end Ada_Skip_String;

      function Get_Line is
         new Util.Files.Text_IO.Next_Line
               (Line_Continuation => "",
                Comment_Start     => "--",
                Delimiters        => Ada.Strings.Maps.To_Set ('"'),
                Strings           => Ada_Skip_String);
      --  Note: we only need to handle the double quote as a string delimiter,
      --  for "--" can only occur in strings, but never in character literals.
      --  Hence it isn't necessary to handle the single quote at all here.

      Curr_Line : UT.Unbounded_String;
      Curr      : UT.String_Access;
      Curr_Idx  : Natural;

      Curr_Token  : Token := Other_Token;
      Token_Image : UT.Unbounded_String;
      Token_Ptr   : UT.String_Access;
      --  Set for 'Name_Token' and 'String_Token'; in the latter case, it
      --  also contains the delimiting double quotes.

      procedure Load_Line
      is
      begin
         UT.Set (Curr_Line, Get_Line (F));
         Curr      := UT.Internal.Get_Ptr (Curr_Line);
         Curr_Idx  := 1;
         if Curr_Idx > Curr'Last then
            raise Scan_Error;
         end if;
      end Load_Line;

      function Find_Token
        return Token
      is
      begin
         case Token_Ptr (Token_Ptr'First) is
            when 'f' | 'F' =>
               if To_Lower (Token_Ptr.all) = "function" then
                  return Function_Token;
               end if;
            when 'g' | 'G' =>
               if To_Lower (Token_Ptr.all) = "generic" then
                  return Generic_Token;
               end if;
            when 'i' | 'I' =>
               if To_Lower (Token_Ptr.all) = "is" then
                  return Is_Token;
               end if;
            when 'n' | 'N' =>
               if To_Lower (Token_Ptr.all) = "new" then
                  return New_Token;
               end if;
            when 'p' | 'P' =>
               declare
                  S : constant String := To_Lower (Token_Ptr.all);
               begin
                  if S = "package" then
                     return Package_Token;
                  elsif S = "pragma" then
                     return Pragma_Token;
                  elsif S = "private" then
                     return Private_Token;
                  elsif S = "procedure" then
                     return Procedure_Token;
                  end if;
               end;
            when 'r' | 'R' =>
               if To_Lower (Token_Ptr.all) = "return" then
                  return Return_Token;
               end if;
            when 't' | 'T' =>
               if To_Lower (Token_Ptr.all) = "type" then
                  return Type_Token;
               end if;
            when 'u' | 'U' =>
               if To_Lower (Token_Ptr.all) = "use" then
                  return Use_Token;
               end if;
            when 'w' | 'W' =>
               if To_Lower (Token_Ptr.all) = "with" then
                  return With_Token;
               end if;
            when others =>
               null;
         end case;
         return Name_Token;
      end Find_Token;

      Numeral          : constant Ada.Strings.Maps.Character_Set :=
        Ada.Strings.Maps.To_Set ("0123456789_");

      Based_Numeral    : constant Ada.Strings.Maps.Character_Set :=
        Ada.Strings.Maps.To_Set ("0123456789_ABCDEFabcdef");

      procedure Advance
      is
      begin
         if Curr_Idx > Curr'Last then Load_Line; end if;
         declare
            Ch : Character := Curr (Curr_Idx);
         begin
            while Is_Blank (Ch) loop
               Curr_Idx := Curr_Idx + 1;
               if Curr_Idx > Curr'Last then
                  Load_Line; Curr_Idx := 1;
               end if;
               Ch := Curr (Curr_Idx);
            end loop;
            case Ch is
               when '(' =>
                  Curr_Token := Left_Paren_Token;

               when ')' =>
                  Curr_Token := Right_Paren_Token;

               when ';' =>
                  Curr_Token := Semicolon_Token;

               when '.' =>
                  Curr_Token := Period_Token;

               when 'A' .. 'Z' | 'a' .. 'z' =>
                  --  Parse a name: any sequence of characters, digits, and
                  --  underscores.
                  declare
                     Stop_Idx : constant Natural :=
                       Identifier (Curr (Curr_Idx .. Curr'Last));
                  begin
                     UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx));
                     Token_Ptr := UT.Internal.Get_Ptr (Token_Image);
                     Curr_Idx := Stop_Idx;
                  end;
                  Curr_Token := Find_Token;

               when ''' =>
                  if Curr_Idx + 2 <= Curr'Last and then
                     Curr (Curr_Idx + 2) = '''
                  then
                     Curr_Idx := Curr_Idx + 2;
                  end if;
                  Curr_Token := Other_Token;

               when '"' =>
                  --  Skip a string.
                  declare
                     Stop_Idx : constant Natural :=
                       Ada_Skip_String (Curr (Curr_Idx .. Curr'Last), '"');
                  begin
                     if Stop_Idx = 0 then
                        raise Scan_Error;
                     end if;
                     UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx));
                     Token_Ptr := UT.Internal.Get_Ptr (Token_Image);
                     Curr_Idx := Stop_Idx;
                     Curr_Token := String_Token;
                  end;

               when '0' .. '9' =>
                  --  Skip a number. Note: use a simplified syntax!
                  declare
                     Stop_Idx : Natural := Curr_Idx;
                  begin
                     while Stop_Idx <= Curr'Last and then
                           Is_In (Numeral, Curr (Stop_Idx))
                     loop
                        Stop_Idx := Stop_Idx + 1;
                     end loop;
                     if Stop_Idx <= Curr'Last then
                        if Curr (Stop_Idx) = '#' then
                           Stop_Idx := Stop_Idx + 1;
                           --  Actually, there must be at least one digit, and
                           --  at most one period.
                           while Stop_Idx <= Curr'Last and then
                                 Is_In (Based_Numeral, Curr (Stop_Idx))
                           loop
                              Stop_Idx := Stop_Idx + 1;
                           end loop;
                           if Stop_Idx <= Curr'Last and then
                              Curr (Stop_Idx) = '#'
                           then
                              Stop_Idx := Stop_Idx + 1;
                           else
                              raise Scan_Error;
                           end if;
                        elsif Curr (Stop_Idx) = '.' then
                           Stop_Idx := Stop_Idx + 1;
                           --  Actually, there must be at least one digit.
                           while Stop_Idx <= Curr'Last and then
                                 Is_In (Numeral, Curr (Stop_Idx))
                           loop
                              Stop_Idx := Stop_Idx + 1;
                           end loop;
                        end if; --  Fraction or Based
                     end if;
                     if Stop_Idx <= Curr'Last and then
                        Curr (Stop_Idx) = 'E'
                     then
                        Stop_Idx := Stop_Idx + 1;
                        if Stop_Idx > Curr'Last then raise Scan_Error; end if;
                        case Curr (Stop_Idx) is
                           when '0' .. '9' =>
                              null;
                           when '+' | '-' =>
                              Stop_Idx := Stop_Idx + 1;
                              if Stop_Idx > Curr'Last then
                                 raise Scan_Error;
                              end if;
                           when others =>
                              raise Scan_Error;
                        end case;
                        --  Actually, there must be at least one digit now.
                        while Stop_Idx <= Curr'Last and then
                              Is_In (Numeral, Curr (Stop_Idx))
                        loop
                           Stop_Idx := Stop_Idx + 1;
                        end loop;
                     end if; --  Exponent
                     Curr_Idx := Stop_Idx - 1;
                  end;
                  Curr_Token := Other_Token;

               when others =>
                  Curr_Token := Other_Token;

            end case;
            Curr_Idx := Curr_Idx + 1;
         end;
      end Advance;

      function Current_Token
        return Token
      is
      begin
         return Curr_Token;
      end Current_Token;

      function Image
        return UT.Unbounded_String
      is
      begin
         if Curr_Token = Name_Token or else
            Curr_Token = String_Token
         then
            return Token_Image;
         else
            return UT.Null_Unbounded_String;
         end if;
      end Image;

      procedure Init
        (File_Name : in String)
      is
      begin
         Ada.Text_IO.Open (F, Ada.Text_IO.In_File, File_Name);
         Load_Line;
         Advance;
      end Init;

      procedure Close
      is
      begin
         if Ada.Text_IO.Is_Open (F) then
            Ada.Text_IO.Close (F);
         end if;
      end Close;

   end Scanner;

   ----------------------------------------------------------------------------
   --  Parsing routines. This is a very simple recursive descent parser, yet
   --  it recognizes syntactically correct Ada 95 library unit headers up
   --  to the library unit name. It doesn't do any error recovery, and it
   --  skips source chunks that are not interesting. The sole purpose of this
   --  is to get the name of the library unit, not any syntax or semantics
   --  checking.

   package Parser is

      function Library_Unit
        return String;

      Parse_Error : exception;

   end Parser;

   package body Parser is

      use Scanner;

      procedure Skip_Parentheses
      is
         Level   : Natural := 0;
      begin
         loop
            case Current_Token is
               when Left_Paren_Token =>
                  Level := Level + 1;

               when Right_Paren_Token =>
                  Level := Level - 1;

               when others =>
                  null;

            end case;
            Advance;
            exit when Level = 0;
         end loop;
      end Skip_Parentheses;

      procedure Skip_To_Semicolon
      is
      begin
         while Current_Token /= Semicolon_Token loop
            Advance;
         end loop;
      end Skip_To_Semicolon;

      procedure Skip_To_Semicolon_Nested
      is
      begin
         while Current_Token /= Semicolon_Token loop
            if Current_Token = Left_Paren_Token then
               Skip_Parentheses;
            else
               Advance;
            end if;
         end loop;
      end Skip_To_Semicolon_Nested;

      procedure Context_Clauses
      is
      begin
         loop
            case Current_Token is
               when With_Token | Use_Token =>
                  Skip_To_Semicolon;

               when Pragma_Token =>
                  Skip_To_Semicolon_Nested;

               when others =>
                  exit;

            end case;
            --  Skip the semicolon.
            Advance;
         end loop;
      end Context_Clauses;

      procedure Generic_Formals
      is
      begin
         loop
            case Current_Token is
               when Pragma_Token =>
                  --  Just to be on the safe side: allow pragmas in the generic
                  --  formal part.
                  Skip_To_Semicolon_Nested;

               when Use_Token =>
                  Skip_To_Semicolon;

               when Type_Token =>
                  --  Generic formal type.
                  Advance;
                  if Current_Token /= Name_Token then
                     raise Parse_Error;
                  end if;
                  Advance;
                  if Current_Token = Left_Paren_Token then
                     --  Discriminants.
                     Skip_Parentheses;
                  end if;
                  if Current_Token /= Is_Token then
                     raise Parse_Error;
                  end if;
                  Skip_To_Semicolon;

               when With_Token =>
                  --  Generic formal subprogram or formal package.
                  Advance;
                  case Current_Token is
                     when Package_Token =>
                        Advance;
                        if Current_Token /= Name_Token then
                           raise Parse_Error;
                        end if;
                        Advance;
                        if Current_Token /= Is_Token then
                           raise Parse_Error;
                        end if;
                        Advance;
                        if Current_Token /= New_Token then
                           raise Parse_Error;
                        end if;
                        Advance;
                        if Current_Token /= Name_Token then
                           raise Parse_Error;
                        end if;
                        Advance;
                        --  It may be an expanded name (Package.Name).
                        while Current_Token = Period_Token loop
                           Advance;
                           if Current_Token /= Name_Token then
                              raise Parse_Error;
                           end if;
                           Advance;
                        end loop;
                        if Current_Token = Left_Paren_Token then
                           --  Generic actual part.
                           Skip_Parentheses;
                        end if;
                        Skip_To_Semicolon;

                     when Procedure_Token | Function_Token =>
                        declare
                           Initial : constant Token := Current_Token;
                        begin
                           Advance;
                           if Current_Token /= Name_Token and then
                              (Initial /= Function_Token or else
                               Current_Token /= String_Token)
                           then
                              raise Parse_Error;
                           end if;
                           Advance;
                           if Current_Token = Left_Paren_Token then
                              --  Parameter specifications.
                              Skip_Parentheses;
                           end if;
                           if Initial = Function_Token then
                              --  Return type
                              if Current_Token /= Return_Token then
                                 raise Parse_Error;
                              end if;
                              Advance;
                              if Current_Token /= Name_Token then
                                 raise Parse_Error;
                              end if;
                              Advance;
                           end if;
                           Skip_To_Semicolon;
                        end;

                     when others =>
                        raise Parse_Error;

                  end case;

               when Name_Token =>
                  --  Generic formal object. Skip to first semicolon not within
                  --  parentheses.
                  Skip_To_Semicolon_Nested;

               when Package_Token | Procedure_Token | Function_Token =>
                  exit;

               when others =>
                  raise Parse_Error;

            end case;
            if Current_Token /= Semicolon_Token then
               raise Parse_Error;
            end if;
            --  Skip the semicolon.
            Advance;
         end loop;
      end Generic_Formals;

      function Library_Unit
        return String
      is
      begin
         Context_Clauses;
         if Current_Token = Private_Token then Advance; end if;
         if Current_Token = Generic_Token then
            Advance;
            Generic_Formals;
         end if;
         case Current_Token is
            when Package_Token | Procedure_Token | Function_Token =>
               declare
                  Initial   : constant Token := Current_Token;
                  Unit_Name : UT.Unbounded_String;
               begin
                  --  Next one must be the unit name.
                  Advance;
                  if Current_Token = Name_Token or else
                     (Initial = Function_Token and then
                      Current_Token = String_Token)
                  then
                     Unit_Name := Image;
                     declare
                        Last_Token : Token := Current_Token;
                     begin
                        Advance;
                        while Current_Token = Period_Token loop
                           Advance;
                           if Last_Token /= Name_Token then
                              raise Parse_Error;
                           end if;
                           if Current_Token = Name_Token or else
                              (Initial = Function_Token and then
                               Current_Token = String_Token)
                           then
                              UT.Append (Unit_Name, '.');
                              UT.Append (Unit_Name, Image);
                              Last_Token := Current_Token;
                              Advance;
                           else
                              raise Parse_Error;
                           end if;
                        end loop;
                     end;
                  else
                     raise Parse_Error;
                  end if;
                  return UT.To_String (Unit_Name);
               end;

            when others =>
               null;

         end case;
         return "";
      end Library_Unit;

   end Parser;

   ----------------------------------------------------------------------------
   --  Exported routines.

   function Get_Unit_Name
     (File_Name : in String)
     return String
   is
   begin
      Scanner.Init (File_Name);
      declare
         Unit_Name : constant String := Parser.Library_Unit;
      begin
         Scanner.Close;
         return Unit_Name;
      end;
   exception
      when others =>
         Scanner.Close;
         return "";
   end Get_Unit_Name;

end AD.Parse;