File: tokenizer.pl.in

package info (click to toggle)
system-tools-backends 1.4.2-3
links: PTS
area: main
in suites: etch, etch-m68k
size: 4,740 kB
ctags: 252
sloc: perl: 28,142; sh: 9,064; makefile: 82
file content (368 lines) | stat: -rwxr-xr-x 8,721 bytes
parent folder | download | duplicates (2)
#!/usr/bin/env perl
#-*- Mode: perl; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-


###############################################

#	Please : do not use this functions yet as they need renaming and the order of the
#	parameters are going to chamge. Chema.

###############################################


# Functions for tokenizing files
#
# Copyright (C) 2001 Ximian, Inc.
#
# Authors: Chema Celorio <chema@ximian.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as published
# by the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

$SCRIPTSDIR = "@scriptsdir@";
if ($SCRIPTSDIR =~ /^@scriptsdir[@]/)
{
    $SCRIPTSDIR = ".";
    $DOTIN = ".in";
}

require "$SCRIPTSDIR/file.pl$DOTIN";

# -------------------------------- Test ---------------------------
sub gst_tokenizer_test
{
    my ($file) = @_;
    my $in, $out;
    my $token;

    $out = \@dummy_arrayt; # Otherwise push @$out doesn't work
    $in = &gst_tokenize ($file);

    while (defined ($token = &gst_tokenize_get_token ($in, $out))) {
        # Replace a token
        if ("subnet" eq $token) {
            my $subnet = &gst_tokenize_get_token ($in, $out);
            &gst_tokenize_replace_token (out, "123\.123\.123\.123");
        }
        # Append tokens
        if ("option" eq $token) {
            &gst_tokenize_append_token ($out, " now");
        }
        # Remove and skip till
        if ("range" eq $token) {
            &gst_tokenize_remove_till  ($out,";");
            &gst_tokenize_skip_till    ($in, ";");
        }
    }

    &gst_file_buffer_save ($out, $file . ".new");

    printf STDERR "Tested tokenizer with $file. Output in $file.new\n";
    printf STDERR "You can :\n";
    printf STDERR "diff $file $file.new\n";
}
#&gst_tokenizer_test ("/etc/dhcpd.conf");

# -------------------------------- token functions -------------------
sub gst_tokenize_warning
{
    my ($mess) = @_;

    printf STDERR "Warning ***: $mess\n";
}

sub gst_tokenize_error
{
    my ($mess) = @_;

    printf STDERR "Error ***: $mess\n";

    exit;
}

sub gst_tokenize_verify_token
{
    my ($in, $verifyme, $out) = @_;
    my $token = &gst_tokenize_get_token ($in, $out);

    if ($token ne $verifyme) {
        &gst_tokenize_warning ("Expected \"" . $verifyme . "\" found \"" . $token . "\" at line ?");
    }
}

sub gst_tokenize_get_token
{
    my ($in, $out) = @_;
    my $token = "";

    while ($token eq "") {
        if (not ($token = shift @$in)) {
            return undef;
        }
        if (defined $out) {
            push (@$out, $token);
        }
        $token =~ s/\s//g;
        $token =~ s/\#.*//;
    }

    return $token;
}

sub gst_tokenize_get_token_unclean
{
    my ($in, $out) = @_;
    my $token = "";

    if (not ($token = shift @$in)) {
        return undef;
    }

    if (defined $out) {
        push (@$out, $token);
    }

    return $token;
}

sub gst_tokenize_get_token_till
{
    my ($in, $out, $till_me) = @_;
    my $resp;

    while (defined ($token = &gst_tokenize_get_token ($in, $out)) and
           ($token ne $till_me)){
        if (defined $resp) {
            $resp .= " ";
        }
        $resp .= $token;
    }

    return $resp;
}

sub gst_tokenize_replace_token
{
    my ($out, $new_token) = @_;
    my $token = pop (@$out);
    my $clean = $token;

    $clean =~ s/\s//g;
    $clean =~ s/\#.*//;
    
    $token =~ s/$clean/$new_token/;

    push (@$out, $token);
}

sub gst_tokenize_append_token
{
    my ($out, $token) = @_;

    # Fixme, split tokens while adding them so that
    # we maintain consistency
    push (@$out, $token);
}

sub gst_tokenize_remove_token
{
    my ($out, $number) = @_;

    if (not defined ($number)) {
        $number = 1;
    }

    while ($number gt 0) {
        pop (@$out);
        $number --;
    }
}

sub gst_tokenize_skip_token
{
    my ($in, $number) = @_;

    if (not defined ($number)) {
        $number = 1;
    }

    while ($number gt 0) {
        shift (@$in);
        $number --;
    }
}

sub gst_tokenize_undo
{
    my ($in, $out, $number) = @_;

    if (not defined ($number)) {
        $number = 1;
    }

    while ($number gt 0) {
        @$in = (my $token = pop @$out, @$in);
        $token =~ s/\s//g;
        $token =~ s/\#.*//;
        if ($token ne "") {
            $number --;
        }
    }
}

sub gst_tokenize_advance_till
{
    my ($in, $till_me, $out) = @_;
    
    while (defined (my $token = &gst_tokenize_get_token ($in, $out))) {
        if ($token eq $till_me) {
            return;
        }
    }
}

sub gst_tokenize_skip_till
{
    my ($in, $till_me) = @_;
    my $token;

    while (1) {
        $token = &gst_tokenize_get_token ($in, $out);
        &gst_tokenize_remove_token ($out);
        if ($token eq $till_me) {
            return;
        }
    }
    
}

sub gst_tokenize_remove_till
{
    my ($out, $till_me) = @_;
    my @dont_remove;
    
    while (1) {
        my $removed = pop (@$out);

        $clean = $removed;
        $clean =~ s/\s//g;
        $clean =~ s/\#.*//;

        if ($clean eq "") {
            # Don't remove lines we ignore
            @dont_remove = ($removed, @dont_remove);
        }
        if ($clean eq $till_me) {
            push (@$out, $removed);
            @$out = (@$out, @dont_remove);
            return;
        }
    }
}

# -------------------------------- tokenizer  -------------------
sub gst_tokenize
{
    my ($file) = @_;
    my $buf = &gst_file_buffer_load ($file);

    my @out;
    my $out_index;
    
    while (@$buf) {
        my $line = $$buf[0];
        my $i = 0;

        if (&gst_ignore_line ($line)) {
            $out[$out_index++] = $line;
            shift @$buf;
            next;
        }
        
        # first skip all the white space/tabs
        my $line_copy = $line;
        $line_copy =~ s/^(\s+)//;
        # save what we removed for later
        my $leading = $1;
        
        # Now, handle token delimiters
        $line_copy =~ s/\;/ \; /g; # ";" is a token by itself
        $line_copy =~ s/,/ , /g;   # "," is a token by itself
        
        # Remove leading white space again, this time don't save it
        # since we added it ourselves
        $line_copy =~ s/^(\s+)//;

        # Get the first token in the string by splitting it
        my @list = split (' ', $line_copy);
        my $token = $list[0];
        
        # Add the leading white space that we removed 
        $token = $leading . $token;
        
        # Now remove our token from the buffer, to "eat" it from it
        $$buf[0] =~ s/$token//;
        
        # Tambet had this code, which looks right but when i run it
        # it behaves the same as the one above. It escapes the
        # pattern before running the s//
        if (0) {
            my $pattern = $token;
            $pattern =~ s/(\W)/\\$1/g;
            $$buf[0] =~ s/$pattern//;
        }
        
        # This is a special case, if what follows our token is a newline
        # we want it to be part of the token so that if we delete it we will
        # also delete the newline. If we don't do this, our files can build up
        # a lot of newlines. We can't do this above because split will consume
        # newlines.
        #
        # Use the remainder of the last operation where we remove $token from $buf
        $_ = $';
        # We have 2 matches posible
        # a) whitespace + newline and
        # b) ws + comment (with an implicit newline)
        if ((/^\s*\n/) || (/^\s*\#/)) {
            # If it matches, add it to our token and remove it from $buf
            $token = $token . $_ ;
            $$buf[0] =~ s/$_//;
        }
        
        $out[$out_index++] = $token;

        # If we removed all of the current line, shift buffer
        # so that in the next iteration $line is not empy
        if ($$buf[0] eq "") {
            shift @$buf;
            next;
        }
        
    }
    
    #construc the input file to compare it.
    if (0) {
        my $res = "";
        @copy = @out;
        while (@copy) {
            $res .= $copy[0];
            shift @copy;
        }
    }

    return \@out;
}

1;