File: ansidecl.d

package info (click to toggle)
clisp 1%3A2.27-0.5
links: PTS
area: main
in suites: woody
size: 49,860 kB
ctags: 20,752
sloc: ansic: 123,781; lisp: 67,533; asm: 19,633; xml: 11,766; sh: 9,788; fortran: 8,307; makefile: 3,570; objc: 2,481; perl: 1,744; java: 341; yacc: 318; sed: 117
file content (538 lines) | stat: -rw-r--r-- 20,311 bytes
# Programm zum Umwandeln von Funktionsdeklarationen vom traditionellen Stil
# in die ANSI-Syntax bei nicht allzu übel geformten C-Programmen
# Bruno Haible 16.2.1993

# Ziel:
# 1. Token &! streichen.
# 2. Deklarationen
#           funname(vname1,...,vnamek)
#             sspec1 tspec1 vname1 ;
#             ...;
#             sspeck tspeck vnamek ;
#             {
#    umwandeln in
#           funname ( sspec1 tspec1 vname1 , ..., sspeck tspeck vnamek ) {
#    unter Beibehaltung aller Kommentare, Präprozessor-Kommandos usw.

# Methode:
# Mit Kenntnis der Begriffe "Präprozessor-Kommando", "Kommentar", "Token"
# wird nach den öffnenden geschweiften Klammern '{' des äußersten
# Schachtelungslevels gesucht, denen ein Strichpunkt ';' oder
# eine Klammer zu ')' unmittelbar vorangeht.
# Von dort aus wird (unter Mitzählen der Strichpunkte im äußersten
# Schachtelungslevel) die vorige Klammer zu ')' des äußersten Schachtelungs-
# levels gesucht. Die Strichpunkte werden in Kommata bzw. eine Klammer zu
# umgewandelt. Bis zur vorigen Klammer auf '(' des äußersten Schachtelungs-
# levels wird alles durch Leerstellen ersetzt.


#define MAXARGCOUNT  50  # maximale Anzahl Argumente einer Funktionsdefinition
#define MAXHEADERLEN  5000  # maximale Länge eines Funktionsdefinitions-Kopfes

#define local static
#define global
#define var
#define loop  while (1)
#define until(exp)  while (!(exp))
typedef unsigned char  uintB;
typedef unsigned short  uintW;
typedef unsigned long  uintL;
typedef int  boolean;
#define FALSE 0
#define TRUE 1

#include <stdlib.h>

#include <stdio.h>

#ifndef NULL
#define NULL ((void*)0)
#endif

#if !(defined(__GNUC__) && !defined(__STRICT_ANSI__))
#define inline
#endif

local FILE* infile;
local FILE* outfile;

# Input
# =====

local uintL input_line;

local int in_char (void)
  { var int c = getc(infile);
    if (c=='\n') { input_line++; }
    return c;
  }

local int peek_char (void)
  { var int c = getc(infile);
    if (!(c==EOF)) { ungetc(c,infile); }
    return c;
  }

local uintL last_good_input_line;

# Output
# ======

# Output kann immer ein wenig gepuffert werden:
enum out_mode { direct, buffered };
local struct { enum out_mode mode; # Output-Modus
               uintB buffer[MAXHEADERLEN]; # Buffer
               uintL buffindex; # Index in den Buffer
             }
      out;

local inline void char_out (uintB ch)
  { putc(ch,outfile); }

# Output-Bufferung ausschalten:
local void outbuffer_off (void)
  { if (out.mode==buffered)
      { var uintL index = 0;
        while (index < out.buffindex)
          { char_out(out.buffer[index]); index++; }
        out.mode = direct;
  }   }

# Output-Bufferung ausschalten und dabei an einer Stelle einen String einfügen:
local void outbuffer_off_insert (uintL insertpoint, char* insert)
  { if (out.mode==buffered)
      { var uintL index = 0;
        loop
          { if (index==insertpoint)
              { while (!(*insert==0)) { char_out(*insert++); } }
            if (index == out.buffindex) break;
            char_out(out.buffer[index]); index++;
          }
        out.mode = direct;
  }   }

# Output-Bufferung einschalten:
local void outbuffer_on (void)
  { if (out.mode==direct)
      { out.buffindex = 0;
        out.mode = buffered;
  }   }

# Character ausgeben:
local void out_char (int c)
  { if (out.mode==buffered)
      { if (out.buffindex < MAXHEADERLEN)
          { out.buffer[out.buffindex++] = c; }
          else
          # Buffer voll -> Buffer abschalten
          { outbuffer_off(); char_out(c); }
      }
      else
      { char_out(c); }
  }

# lexikalische Analyse
# ====================

# Holt das nächste Character:
local int next_char (void)
  { var int c = in_char();
    if (!(c==EOF))
      { out_char(c); } # c auch ausgeben
    return c;
  }

# Für unsere Zwecke brauchen ++ -> != usw. nicht als eigene Token betrachtet
# zu werden, wir kennen also nur:
#   EOF
#   Identifier
#   Zahl-Konstanten
#   Character-Konstanten
#   String-Konstanten
#   Operator/Separator
# Verallgemeinerte Tokens: Expressions (mit balancierten Klammern)
enum token_type { eof, eol, ident, number, charconst, stringconst, sep, expr };
typedef struct { enum token_type type;
                 # falls Bufferung aktiv:
                 uintL startindex; # Startindex im Buffer
                 uintL endindex; # Endindex im Buffer
                 # bei sep (Operator/Separator):
                 uintB ch;
               }
        token_;
typedef token_* Token;

# globale Token-Tabelle (Inhalt nur während einer Bufferungsperiode gültig):
#define MAXTOKENS  20000
local struct { uintL index;
               token_ data[MAXTOKENS];
             }
      tokens;

# Holt das nächste Token:
# (Innerhalb von Präprozessor-Direktiven zählt Zeilenende als eigenes Token,
# und '#' leitet keine verschachtelte Präprozessor-Direktive ein.)
local Token nexttoken (boolean within_prep_directive)
  { if (tokens.index == MAXTOKENS)
      # kein Platz mehr in der Token-Tabelle -> nicht mehr buffern
      { outbuffer_off(); tokens.index = 0;
        fprintf(stderr,"Token-Tabelle übergelaufen in Zeile %lu.\nFehler irgendwo nach Zeile %lu.\n",input_line,last_good_input_line);
        exit(1);
      }
    # Nun ist tokens.index < MAXTOKENS .
   {var Token token = &tokens.data[tokens.index]; # Platz fürs nächste Token
    tokens.index++;
    restart:
    token->startindex = out.buffindex;
    if (peek_char() == '&')
      { in_char(); # '&' überlesen
        if (peek_char() == '!')
          # '&!'
          { in_char(); goto restart; } # '!' überlesen
          else
          { out_char('&'); token->type = sep; token->ch = '&'; goto fertig; }
      }
    { var int c = next_char();
      switch (c)
        { case EOF:
            # EOF
            token->type = eof; goto fertig;
          case ' ': case '\v': case '\t':
            # Whitespace. überlesen
            goto restart;
          case '\n':
            # Zeilenende
            if (within_prep_directive)
              { token->type = eol; goto fertig; } # als Token zurück
              else
              { goto restart; } # überlesen
          case '\\':
            if (peek_char()=='\n')
              # Zeilenende nach '\'. überlesen
              { next_char(); goto restart; }
              else
              goto separator;
          case '/':
            if (peek_char() == '*')
              # Kommentar
              { next_char();
                loop { c = next_char();
                       if (c==EOF) { fprintf(stderr,"Unbeendeter Kommentar\n"); break; }
                       if ((c=='*') && (peek_char()=='/')) { next_char(); break; }
                     }
                goto restart;
              }
              else
              goto separator;
          case '*':
            if (peek_char() == '/')
              # illegales Kommentar-Ende
              { fprintf(stderr,"Kommentar-Ende außerhalb Kommentar in Zeile %lu\n",input_line); }
            goto separator;
          case '#':
            if (within_prep_directive)
              { goto separator; }
              else
              { # Präprozessor-Anweisung.
                # Bis Zeilenende oder EOF lesen.
                loop
                  { var Token subtoken = nexttoken(TRUE);
                    if ((subtoken->type == eof) || (subtoken->type == eol))
                      break;
                  }
                goto restart; # und überlesen
              }
          case '.':
            c = peek_char();
            if (!(((c>='0') && (c<='9')) || (c=='.'))) goto separator;
          case '0': case '1': case '2': case '3': case '4':
          case '5': case '6': case '7': case '8': case '9':
            # Zahl. Weiterlesen, solange alphanumerisches Zeichen oder '.':
            loop
              { c = peek_char();
                if (((c>='0') && (c<='9'))
                    || ((c>='A') && (c<='Z')) || ((c>='a') && (c<='z'))
                    || (c=='.')
                   )
                  { next_char(); }
                  else
                  break;
              }
            token->type = number; goto fertig;
          case '\'':
            # Character-Konstante
            loop
              { c = next_char();
                if (c==EOF) { fprintf(stderr,"Unbeendete Character-Konstante\n"); break; }
                if (c=='\'') break;
                if (c=='\\') { c = next_char(); }
              }
            token->type = charconst; goto fertig;
          case '\"':
            # String-Konstante
            loop
              {
#ifndef QUOTE_QUOTES
                c = next_char();
                if (c==EOF) { fprintf(stderr,"Unbeendete String-Konstante\n"); break; }
#else # muss Single-Quotes in Strings quotieren:
                c = in_char();
                if (c==EOF) { fprintf(stderr,"Unbeendete String-Konstante\n"); break; }
                if (c=='\'')
                  { # statt "'" ein "\047" ausgeben:
                    out_char('\\');
                    out_char('0'+((((unsigned char)'\'')/64)%8));
                    out_char('0'+((((unsigned char)'\'')/8)%8));
                    out_char('0'+(((unsigned char)'\'')%8));
                    continue;
                  }
                out_char(c);
#endif
                if (c=='\"') break;
                if (c=='\\') { c = next_char(); }
              }
            token->type = stringconst; goto fertig;
          case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
          case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
          case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
          case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
          case 'Y': case 'Z':
          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
          case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
          case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
          case 's': case 't': case 'u': case 'v': case 'w': case 'x':
          case 'y': case 'z':
          case '_':
            # Identifier. alles alphanumerische überlesen.
            loop
              { c = peek_char();
                if (   ((c>='0') && (c<='9'))
                    || ((c>='A') && (c<='Z')) || ((c>='a') && (c<='z'))
                    || (c=='_')
                   )
                  { next_char(); }
                  else
                  break;
              }
            token->type = ident; goto fertig;
          default:
          separator:
            token->type = sep; token->ch = c; goto fertig;
    }   }
    fertig:
    token->endindex = out.buffindex;
    return token;
  }}
local inline Token next_token (void)
  { return nexttoken(FALSE); }

# Klammern mitzählen:
#define MAXBRACES 1000 # maximale Verschachtelungstiefe von Klammern
local struct { uintL count;
               struct { uintB brace_type; uintL input_line; } opening[MAXBRACES];
             }
      open_braces;

# Mitzählen einer öffnenden Klammer:
local void handle_opening_token (Token token)
  { if (open_braces.count < MAXBRACES)
      { open_braces.opening[open_braces.count].brace_type = token->ch;
        open_braces.opening[open_braces.count].input_line = input_line;
      }
    open_braces.count++;
  }

# Mitzählen einer schließenden Klammer (ohne Überprüfung der Verschachtelung):
local inline void handle_closing_token (Token token)
  { open_braces.count--; }

# nächste Expression mit balancierten Klammern '()', '{}', '[]' lesen:
# (Dabei ist auf das Niveau open_braces.count=0 zu kommen,
#  evtl. ist jetzt schon open_braces.count>0.)
local Token next_balanced_token (Token start_token)
  { var uintL open_braces_start = 0; # Ziel-Niveau: 0
    var Token token = (start_token==NULL ? next_token() : start_token);
    var uintL startindex = token->startindex;
    loop
      { # Hier stets  open_braces.count >= open_braces_start .
        switch (token->type)
          { case eof:
              if (open_braces.count > open_braces_start)
                { if (open_braces.count <= MAXBRACES)
                    fprintf(stderr,"Nicht geschlossene '%c' in Zeile %lu\n",
                                   open_braces.opening[open_braces.count-1].brace_type,
                                   open_braces.opening[open_braces.count-1].input_line
                           );
                    else
                    fprintf(stderr,"Nicht geschlossene '(' oder '{' oder '['\n");
                }
              return token; # EOF-Token als Ergebnis
            case sep:
              switch (token->ch)
                { case '(': case '{': case '[':
                    handle_opening_token(token);
                    break;
                  case ')': case '}': case ']':
                    if (open_braces.count > open_braces_start)
                      { open_braces.count--;
                        if (open_braces.count < MAXBRACES)
                          { var uintB opening_ch = open_braces.opening[open_braces.count].brace_type;
                            var uintB closing_ch = token->ch;
                            if (!(   ((opening_ch == '(') && (closing_ch == ')'))
                                  || ((opening_ch == '{') && (closing_ch == '}'))
                                  || ((opening_ch == '[') && (closing_ch == ']'))
                               ) )
                              { fprintf(stderr,"Öffnende Klammer '%c' in Zeile %lu\n und schließende Klammer '%c'\n in Zeile %lu passen nicht zusammen.\n",
                                        opening_ch,open_braces.opening[open_braces.count].input_line,
                                        closing_ch,input_line
                                       );
                          }   }
                      }
                      else
                      { fprintf(stderr,"Nicht geöffnete '%c' in Zeile %lu\n",
                                token->ch,input_line
                               );
                        goto fertig;
                      }
                    break;
                  default:
                    break;
                }
            default: ;
              # alles andere ist ausbalanciert
          }
        if (open_braces.count == open_braces_start) break; # fertig ausbalanciert?
        token = next_token(); # nein -> nächstes Token lesen
      }
    fertig:
    token->startindex = startindex;
    return token;
  }

# Umwandlung der Funktionsdefinitions-Deklarationen auf dem ganzen File
# vornehmen:
local void convert (void)
  { input_line = 1; last_good_input_line = 1;
    out.mode = direct;
    open_braces.count = 0;
    restart1: # Hier out.mode=direct, neues Token lesen:
    tokens.index = 0; # Token-Tabelle leeren
   {var Token token = next_token(); # nächstes Token lesen
    restart2: # Hier out.mode=direct, neues Token gelesen.
    if ((token->type == sep) && (token->ch == '(')) # Klammer auf?
      # ja -> aufpassen:
      { handle_opening_token(token);
        outbuffer_on(); # Buffer einschalten
        # Es sollten nun k Identifier, getrennt durch k-1 Kommata, kommen:
        token = next_token();
        if ((token->type == sep) && (token->ch == ')')) # Klammer zu?
          # ja -> in eine leere Parameterliste evtl. 'void' einfügen:
          { var uintL insertpoint = token->startindex;
            handle_closing_token(token);
            token = next_token();
            if (!((token->type == sep) && (token->ch == '{'))) # geschweifte Klammer auf?
              { outbuffer_off(); goto restart2; }
            # token = '{'
            # Umwandeln: "void" einfügen.
            outbuffer_off_insert(insertpoint,"void");
            token = next_balanced_token(token); # Funktionsdefinition überlesen
          }
          else
          # nein -> nichtleere Parameterliste verarbeiten:
          { var Token param_names[MAXARGCOUNT];
            var Token param_comma[MAXARGCOUNT];
            var uintL param_count = 0;
            loop
              { if (param_count==MAXARGCOUNT) goto nix; # kein Platz mehr?
                if (!(token->type == ident)) # Identifier?
                  goto nix;
                param_names[param_count] = token; # ja -> abspeichern
                token = next_token();
                if (!(   ((token->type == sep) && (token->ch == ')')) # Klammer zu?
                      || ((token->type == sep) && (token->ch == ',')) # oder Komma?
                   ) )
                  goto nix;
                param_comma[param_count] = token; # ja -> abspeichern
                param_count++;
                if ((token->type == sep) && (token->ch == ')')) # Klammer zu?
                  break;
                token = next_token();
              }
            # token = ')'
            handle_closing_token(token);
            # Parameter-Deklarationen verarbeiten:
           {var Token paramdecl_semicolons[MAXARGCOUNT];
            var uintL paramdecl_count = 0;
            loop
              { token = next_token();
                if ((token->type == sep) && (token->ch == '(')) # Klammer auf?
                  # Entscheidung der Zweideutigkeit (Beispiel:
                  # " macro(macroarg) (x) int x; {los();} "
                  # -> zugunsten der Erkennung von "macro(macroarg)" und nicht
                  # "macro":
                  { outbuffer_off(); goto restart2; }
                if ((token->type == sep) && (token->ch == '{')) # geschweifte Klammer auf?
                  break;
                loop
                  { token = next_balanced_token(token);
                    if (token->type==eof) goto nix;
                    if ((token->type == sep) && (token->ch == ';')) # Semikolon?
                      break;
                    # Entscheidung der Zweideutigkeit (Beispiel:
                    # " foo(a,b) int a; bar(x,y) int x; int y; {los();} "
                    # -> zugunsten der Erkennung von "bar" und nicht "foo"):
                    token = next_token();
                    if ((token->type == sep) && (token->ch == '(')) # Klammer auf?
                      { outbuffer_off(); goto restart2; }
                  }
                if (paramdecl_count==MAXARGCOUNT) goto nix; # kein Platz mehr?
                paramdecl_semicolons[paramdecl_count] = token;
                paramdecl_count++;
              }
            # token = '{'
            if ((param_count == paramdecl_count) # gleichviele Variablen wie Deklarationen?
                && (out.mode==buffered) # und Buffer noch da?
               )
              # ja -> Modifikation durchführen:
              # Alle param_names und param_comma durch Leerstellen und
              # alle paramdecl_semicolons durch Komma bzw. Klammer zu
              # überschreiben:
              { do { param_count--;
                    {var Token name = param_names[param_count];
                     var uintL index = name->startindex;
                     var uintL endindex = name->endindex;
                     until (index==endindex) { out.buffer[index++] = ' '; }
                    }
                    {var Token comma = param_comma[param_count];
                     var uintL index = comma->startindex;
                     var uintL endindex = comma->endindex;
                     until (index==endindex) { out.buffer[index++] = ' '; }
                   }}
                   until (param_count==0);
                out.buffer[paramdecl_semicolons[--paramdecl_count]->startindex] = ')';
                until (paramdecl_count==0)
                  { out.buffer[paramdecl_semicolons[--paramdecl_count]->startindex] = ','; }
              }
            outbuffer_off();
            last_good_input_line = input_line;
            token = next_balanced_token(token); # Funktionsdefinition überlesen
          }}
      }
      else
      { nix: # keine umwandelbare Funktionsdefinition
        outbuffer_off();
        token = next_balanced_token(token);
      }
    # Hier ist wieder out.mode=direct.
    if (!(token->type==eof)) goto restart1;
    # Bei EOF am Input: fertig (da outfile ungebuffert).
  }}

int main ()
  { infile = stdin;
    outfile = stdout;
    convert();
    if (ferror(stdin) || ferror(stdout)) { exit(1); }
    exit(0);
  }