package ACheck::Parser;


use strict;
use locale;
use Exporter;
use File::Spec::Functions;
use ACheck::Common;

use constant LAST_PARSED_LIST => "LAST_PARSED_LIST";		# special name
use constant RULES_DIR	=> "/usr/share/acheck/rules";		# rules directory
use constant RULES_E	=> "rules";				# rules files extension

use vars  qw(@ISA @EXPORT);

@ISA	= qw(Exporter);
@EXPORT	= qw(
	load_ruleset
	);

my @fields;					# parsed fields
my @values;					# parsed values
my @lines;					# corresponding line (for error message output)
my %lists;					# list of rule lists	name => hashref
my %rules;					# list of rules		name => hashref
my %valids;					# list of valids	name => hashref
my %comments;					# list of comments	name => hashref

my @LIST_FIELDS = qw(
	type
	name
	list
	rule
	comment
	spell
	);
my @LIST_REGEX = qw(
	test
	);
my @RULE_FIELDS = qw(
	type
	name
	valid
	hint
	fix
	);
my @RULE_REGEX = qw(
	regex
	);
my @VALID_FIELDS = qw(
	name
	);
my @VALID_REGEX = qw(
	pre
	in
	post
	);
my @COMMENT_FIELDS = qw(
	name
	start_offset
	stop_offset
	);
my @COMMENT_REGEX = qw(
	start
	skip
	stop
	);
my @END_FIELDS = qw(
	end_list
	end_rule
	end_comment
	end_valid
	);
my @FIELDS = (@LIST_FIELDS, @RULE_FIELDS, @VALID_FIELDS, @COMMENT_FIELDS, @LIST_REGEX, @RULE_REGEX, @VALID_REGEX, @COMMENT_REGEX, @END_FIELDS);

my %LIST_VALID = (
	'type' => [qw(
		loop until while
		)],
	'spell' => [qw(
		yes no
		)]
	);
my %RULE_VALID = (
	'type' => [qw(
		fix autofix warning error nop
		)]
	);
	

sub shift_0() {
	my $f = shift @fields;
	my $v = shift @values;
	my $l = shift @lines;

	return ($f, $v, $l);
}

sub get_0() {
	my $f = $fields[0];
	my $v = $values[0];
	my $l = $lines[0];

	return ($f, $v, $l);
}

sub valid($$$%) {
	my $f = shift;
	my $v = shift;
	my $l = shift;
	my %V = @_;

	foreach (keys %V) {
		next unless $f =~ /^$_$/i;
		foreach (@{ $V{$_} }) {
			return unless $v =~ /^$_$/;
		}
		suicide	__("%s: value not valid\n"), $l;
	}
}

sub escape_regex($) {
	$_ = shift;

	s/(\$\D)/\\$1/g;			# `$'	 prevent use of variables but `$1', `$2', ...
	s/\(\?\{/(\\?{/g;			# `(?{'	 prevent arbitrary code execution
	s/\(\?\?\{/(\\?\\?{/g;			# `(??{' prevent arbitrary code execution

	return $_;
}

sub parse_comment() {
	my $comment = {};			# new hashref
	my $f;					# field
	my $v;					# value
	my $l;					# line
	
	shift_0;

	$comment->{'type'} = "comment";

	while (@fields) {
		($f, $v, $l) = get_0;

		if	($f =~ /^(comment)|(valid)|(rule)|(list)$/i) {
			return $comment;
		} elsif	($f =~ /^end_comment$/i) {
			shift_0;
			return $comment;
		} elsif	($f =~ /^end_/i) {
			return $comment;
		} elsif (grep /^$f$/i, @COMMENT_REGEX) {
			shift_0;
			$comment->{$f} = escape_regex $v;
		} elsif (grep /^$f$/i, @COMMENT_FIELDS) {
			shift_0;
			$comment->{$f} = $v;
		} else	{
			suicide	__("%s: syntax error\n"), $l;
		}
	}

	return $comment;
}

sub parse_valid() {
	my $valid = {};				# new hashref
	my $f;					# field
	my $v;					# value
	my $l;					# line
	
	shift_0;

	while (@fields) {
		($f, $v, $l) = get_0;

		if	($f =~ /^(comment)|(valid)|(rule)|(list)$/i) {
			return $valid;
		} elsif	($f =~ /^end_valid$/i) {
			shift_0;
			return $valid;
		} elsif	($f =~ /^end_/i) {
			return $valid;
		} elsif (grep /^$f$/i, @VALID_REGEX) {
			shift_0;
			$valid->{$f} = escape_regex $v;
		} elsif (grep /^$f$/i, @VALID_FIELDS) {
			shift_0;
			$valid->{$f} = $v;
		} else	{
			suicide	__("%s: syntax error\n"), $l;
		}
	}

	return $valid;
}

sub parse_rule() {
	my $rule = {};				# new hashref
	my $f;					# field
	my $v;					# value
	my $l;					# line
	
	shift_0;

	while (@fields) {
		($f, $v, $l) = get_0;

		if	($f =~ /^(comment)|(rule)|(list)$/i) {
			return $rule;
		} elsif	($f =~ /^end_rule$/i) {
			shift_0;
			return $rule;
		} elsif	($f =~ /^end_/i) {
			return $rule;
		} elsif	($f =~ /^valid$/i && $v) {
			shift_0;
			push @{ $rule->{'valid'} }, $v;
		} elsif ($f =~ /^valid$/i) {
			$v = parse_valid;
			$valids{$v->{'name'} || ''} = $v;
			push @{ $rule->{'valid'} }, $v->{'name'} || $v;
		} elsif (grep /^$f$/i, @RULE_REGEX) {
			valid $f, $v, $l, %RULE_VALID;
			shift_0;
			$rule->{$f} = escape_regex $v;
		} elsif (grep /^$f$/i, @RULE_FIELDS) {
			valid $f, $v, $l, %RULE_VALID;
			shift_0;
			if ($f =~ /^fix$/) {
				push @{ $rule->{$f} }, escape_regex $v;
			} elsif ($f =~ /^hint$/) {
				push @{ $rule->{$f} }, $v;
			} else {
				$rule->{$f} = $v;
			}
		} else	{
			suicide	__("%s: syntax error\n"), $l;
		}
	}

	return $rule;
}

sub parse_list();
sub parse_list() {
	my $list = {};				# new hashref
	my $f;					# field
	my $v;					# value
	my $l;					# line
	
	shift_0;

	while (@fields) {
		($f, $v, $l) = get_0;

		if	($f =~ /^list$/i && $v) {
			shift_0;
			push @{ $list->{'rules'} }, $v;
		} elsif	($f =~ /^list$/i) {
			$v = parse_list;
			$lists{$v->{'name'} || ''} = $v;
			push @{ $list->{'rules'} }, $v->{'name'} || $v;
		} elsif	($f =~ /^end_list$/i) {
			shift_0;
			return $list;
		} elsif	($f =~ /^comment$/i && $v) {
			shift_0;
			push @{ $list->{'rules'} }, $v;
		} elsif	($f =~ /^comment$/i) {
			$v = parse_comment;
			$comments{$v->{'name'} || ''} = $v;
			push @{ $list->{'rules'} }, $v->{'name'} || $v;
		} elsif	($f =~ /^rule$/i && $v) {
			shift_0;
			push @{ $list->{'rules'} }, $v;
		} elsif ($f =~ /^rule$/i) {
			$v = parse_rule;
			$rules{$v->{'name'} || ''} = $v;
			push @{ $list->{'rules'} }, $v->{'name'} || $v;
		} elsif (grep /^$f$/i, @LIST_REGEX) {
			valid $f, $v, $l, %LIST_VALID;
			shift_0;
			$list->{$f} = escape_regex $v;
		} elsif (grep /^$f$/i, @LIST_FIELDS) {
			valid $f, $v, $l, %LIST_VALID;
			($f =~ /^name$/i)	 &&
			($v eq LAST_PARSED_LIST) &&
				(suicide __("%s: name %s\nreserved word\n"), $l, LAST_PARSED_LIST);
			shift_0;
			$list->{$f} = $v;
		} else	{
			suicide	__("%s: syntax error\n"), $l;
		}
	}

	return $list;
}

sub parse() {
	my $f;					# field
	my $v;					# value
	my $l;					# line
	
	while (@fields) {
		($f, $v, $l) = get_0;

		if	($f =~ /^list$/i ) {
			$v = parse_list;
			$lists{LAST_PARSED_LIST} = $v;
			$lists{$v->{'name'} || ''} = $v;
		} elsif	($f =~ /^rule$/i) {
			$v = parse_rule;
			$rules{$v->{'name'} || ''} = $v;
		} elsif	($f =~ /^valid$/i) {
			$v = parse_valid;
			$valids{$v->{'name'} || ''} = $v;
		} elsif	($f =~ /^comment$/i) {
			$v = parse_comment;
			$comments{$v->{'name'} || ''} = $v;
		} elsif	($f =~ /^end_(list)|(rule)|(comment)|(valid)$/i ) {
			shift_0;
		} else	{
			suicide	__("%s: syntax error\n"), $l;
		}
	}

	foreach (values %rules) {
		next unless exists $_->{'valid'};
		for (my $i = 0; $i < @{ $_->{'valid'} }; $i++) {
			next if ref $_->{'valid'}[$i];
			$_->{'valid'}[$i] = $valids{$_->{'valid'}[$i]} ||
				suicide __("unable to resolve %s\nundefined name as `valid' rule\n"), $_->{'valid'}[$i];
		}
			
	}
	
	foreach (values %lists) {
		next unless exists $_->{'rules'};
		for (my $i = 0; $i < @{ $_->{'rules'} }; $i++) {
			next if ref $_->{'rules'}[$i];
			my $many;
			$many  = ($rules{$_->{'rules'}[$i]} && $lists   {$_->{'rules'}[$i]}) || 0;
			$many |= ($rules{$_->{'rules'}[$i]} && $comments{$_->{'rules'}[$i]}) || 0;
			$many |= ($lists{$_->{'rules'}[$i]} && $comments{$_->{'rules'}[$i]}) || 0;
			$many && suicide __("unable to resolve %s\nname defined more than once as `rule', `list' and `comment'\n"), $_->{'rules'}[$i];
			$_->{'rules'}[$i] = $rules   {$_->{'rules'}[$i]} ||
					    $lists   {$_->{'rules'}[$i]} ||
					    $comments{$_->{'rules'}[$i]} ||
				suicide __("unable to resolve %s\nundefined name as `rule', `list' or `comment'\n"), $_->{'rules'}[$i];
		}
	}

	my $r;
	   $r->[0] = $lists{'root'} || $lists{''} || $lists{LAST_PARSED_LIST};
	return $r;
}

sub load_one_rulefile($$);
sub load_one_rulefile($$) {
	my $ruleset = shift;					# set
	my $filetype = shift;					# type

	debug 1, "ruleset:      `$ruleset'\n".
		 "filetype:     `$filetype'\n";

	my $filename = catfile(RULES_DIR, $ruleset, "$filetype.".RULES_E);

	my @file;					# file lines
	my $longline;					# temp value for multiline parsing
	my $l = 0;					# line counter

	my $handle;					# file handle
	$handle = new IO::File($filename, '<')	or suicide __("Cannot read `%s': %s\n"), $filename, $!;
	@file = $handle -> getlines;
	$handle -> close;
	chomp @file;

	while (@file) {
		$_ = shift @file;
		$l++;

		if (/^#include\s+(\S+)/) {
			load_one_rulefile($ruleset, $1);
			next;
		}

		s/(?<!\\)#.*//;						# remove comment
		s/^\s*//;						# remove head spaces
		s/\s*$//;						# remove tail spaces

		next if /^$/;						# skip empty line

		s/\\#/#/g;						# un-escape `#'
		if (/\\$/) {						# look for multiline
			chop;
			$longline .= $_;
			next;
		}

		if ($longline) {					# last line of a multiline
			$longline .= $_;
			$_ = $longline;
			undef $longline;
		}

		my $f;
		my $v;
		($f, $v) = split (/\s+/, $_, 2);

		if ($v	       &&
		    $v =~ /^"/ &&
		    $v =~ /"$/   ) {					# remove leading and trailing `"'
			$v =~ s/^"//;
			$v =~ s/"$//;
		}

		suicide(__("%s/%s:%s: `%s', unknown field name\n"), $ruleset, $filetype, $l, $_)
			unless grep(/^$_$/, @FIELDS);			# stop if unknown field name

		push @fields, $f;
		push @values, $v;
		push @lines, "$ruleset\:$l";
	}
}

sub load_ruleset ($$) {
	load_one_rulefile(shift, shift);

	return parse;
}

1;