#!/usr/bin/perl -w 
#
# $Id: sqlcheck.pl,v 1.4.4.1 2001/12/18 08:12:30 netsabes Exp $
#
# sqlcheck.pl:  check SQL code within daCode .php3 files
#
# This script parses PHP code from src/phplib
# to produce a report of usage of tainted vars
# in sql commands.
#
# For usage, see:	$ sqlcheck.pl --help
# or 				$ perldoc sqlcheck.pl
#


## params

  use Getopt::Long;

  # switches for output, by default, are:
  my $showfiles = 0;
  my $showlines = 0;
  my $shortcut  = 1;
  my $showtainted = 1;

  %options= ( 
	'Functions'   => \$pars_func ,
	'Escapes'     => \$pars_escape ,
	'Sqlcommands' => \$pars_sql ,
	'Dbqueries'   => \$pars_dbquery ,
	'Validates'   => \$pars_coder ,
	'Templates'   => \$pars_tmpl ,

	'help'    => \$help ,
	'verbose' => \$verbose ,
	
	'filename!' => \$showfiles ,
	'linenumb!' => \$showlines ,
	'shortcut!' => \$shortcut ,
	'warnings!' => \$showtainted ,
	);
  $Getopt::Long::ignorecase = 0;
  GetOptions(%options);


  # Friendly help splash screen appears
  # when none parse criteria was choosen.
  $pars_func=$pars_escape=$pars_sql=$pars_dbquery=$pars_coder=$pars_tmpl=
  $showfiles=$showlines=$showtainted=$shortcut=1 if $verbose;
  $help = 1 unless $pars_func or $pars_escape or $pars_sql
                or $pars_dbquery or $pars_coder or $pars_tmpl;
  
  if (defined($help)) { 
    print "sqlchek.pl: parses phplib source and prints warnings\n";
    print "Usage: Choose one or more item to parse in the list below:\n"; 
    foreach $opt (sort {\$options{$a} cmp \$options{$b}}keys %options) {
      print "option ";
      if ($opt =~ /help/) {
        print "[--help]      Prints this message...\n";
      } elsif ($opt =~ /verbose/) {
        print "[--verbose]   Prints all messages...\n";
      } elsif ($opt =~ s/!$//) {
        print "[-".substr($opt,0,1)."] ";
        printf "%-30s Switch on/off output of `$opt'\n", "[--$opt] [--no$opt]";
      } else {
        print "[-".substr($opt,0,1)."] ";
        printf "%-30s Will parse `$opt'\n", "[--$opt]";
      }
    }
	print "Please, look at man page for more details: perldoc $0\n";
    exit;
  }


## main

  # where is the code ?
  my $path = `find . .. -name phplib | head -1`;
  chop $path;
  chdir $path or die "phplib not found\n";

  my $DELIMITER = ':'; # will be separating files, lines and code...


  $reftheme = "linuxfr";

  # all daCode calls from the template linuxfr are assumed secure.
  if ($pars_tmpl) {
    foreach $filename (<themes/$reftheme/*.tmpl>) {
      $line_number = 0;
      open(IN, "<$filename");
	  while ($current = <IN>) {
        $line_number++;
        if ($pars_tmpl && $current =~ m,<!-- daCode:\s*(.*?)\s*-->,) {
		  print $filename.$DELIMITER  if $showfiles;
		  print $line_number.$DELIMITER  if $showlines;
		  print $current;
		  $command = $1;
		  $command =~ s/[0-9]+(,[0-9]+)*/7/g;
		  unless (defined($linuxfr{$command}) && $linuxfr{$command} eq "Model") {
			$linuxfr{$command} .= "Model";
  		    print "theme $reftheme taints code $command in: Model\n";
		  }
		}
	  }
	}
  }
		

  # processing each file of the code
  @phplib_files = (<themes/*/*.tmpl>) if $pars_tmpl;
  @phplib_files = (@phplib_files , <*.php3>);

  foreach $filename (@phplib_files) {
    $line_number = 0;
    open(IN, "<$filename");

    # processing each line of the file
    while ($current = <IN>) {
      $line_number++;
      $assignment = 0;

      if ($pars_tmpl && $filename =~ /\.tmpl/ && $current =~ m,<!-- daCode:\s*(.*?)\s*-->,) {
		print $filename.$DELIMITER  if $showfiles;
		print $line_number.$DELIMITER  if $showlines;
		print $current;
		$command = $1;
		$command =~ s/[0-9]+(,[0-9]+)*/7/g;
		if ($linuxfr{$command}) {
			print "$reftheme taints code $command in: Model\n";
		} else {
			print "theme taints code $command in: Rand\n";
		}
		
      } elsif ($pars_coder && $current =~ m://.*sqlchecked:) {
        $assignment = 'daCoder';
        $get = &multi_lines;
        print $get;
        &parsing_escapes($get);

      } elsif ($pars_sql && $current =~ /(\$(sqlc_.)\s*(\.?=))/) {
        $varname = $2;
        $assignment = $3 eq '=' ? 'constructor' : 'modifier';
        if ($assignment eq 'constructor') {
          if ($showtainted) {
            print "following constructor taints $varname in: Blank";
            print ", but was yet tainted ?" if $taint{$varname} ;
            print "\n";
          }
          $taint{$varname} = "Constructed";
        }
        $get = &multi_lines;
        print $get;
        &parsing_sql($varname,$assignment,$get);

      } elsif ($pars_escape && $current =~ /addslashes/) {
        $get = &multi_lines;
        print $get;
        &parsing_escapes($get);

      } elsif ($pars_dbquery && $current =~ /db->query/) {
        $get = &multi_lines;
        print $get;
        &parsing_db($get);

      } elsif      ($pars_func 
                 && $current =~ /^\s*Function/
                 && $current =~ s/{/;/) {
        &reset_taints;
        $get = &multi_lines;
        print $get;
        &parsing_params($get); # taint all the params
      }
    }
    close(IN);
  }
exit;


## functions
  
  # One PHP instruction can be split on multiples lines.
  # So we try to load it by joining all the related lines.
  # The special char `;' at the end should tell us when finished.
  sub multi_lines {
    my $buffer = '';
    chop $current;
    $current =~ s/^\s+//;       # suppress spaces at begin
    unless ($pars_coder && $current =~ m:\s*//.*sqlchecked:) {
      $current =~ s:\s*//.*::;    # and comments at the end.
    }
    $current =~ s/\$this->db->(compat_[a-z_]+)\(.*?\)/_$1\_/;
    $current =~ s/\$config->tables\['(\w+)'\]/_$1_/g    if $shortcut;
    print $filename.$DELIMITER  if $showfiles;
    print $line_number.$DELIMITER  if $showlines;
    while       ($current !~ /;\s*$/
             && ($buffer .= $current)
             && ($current = <IN>)) {
      chop $current;
      $current =~ s/^\s+//;
      $current =~ s:\s*//.*::;
      $current =~ s/\$this->db->(compat_[a-z_]+)\(.*?\)/_$1\_/;
      $current =~ s/\$config->tables\['(\w+)'\]/_$1_/g  if $shortcut;
      $line_number++;
    }
    return "$buffer$current\n";
  }

  sub reset_taints {
    foreach my $v (keys %taint) {
    print "following function taints $v in: Blank\n"     if $showtainted;
    }
    %taint = ();
  }

  sub parsing_params {
    my ($instruction) = @_;
    while ($instruction =~ s/[(,]\s*\$(\w+)/;/) {
      $taint{$1} = "Param";
      print "function taints $1 in: Param\n"             if $showtainted;
    }
  }

  sub parsing_escapes {
    my ($instruction) = @_;
    if ($assignment eq 'daCoder') {
      $instruction =~ /\$(.*?)\s*=/;
      $taint{$1} = "Checked";
      print "daCoder taints $1 in: Checked\n"   if $showtainted;
    
    } elsif ($instruction =~ /\$(.*?)\s*=\s*addslashes\(.*\)/) {
      $taint{$1} = "Addslashed";
      print "assignment taints $1 in: Addslashed\n"      if $showtainted;
    }
  }
  
  sub parsing_db {
    my ($instruction) = @_;
    if ($instruction =~ /db->query\s*\(\s*(".*"|\$sqlc_.)\s*\)/) {
      $sql = $1;
      $sql =~ s/^\$//;
      %{"flag$sql"} = (Hardcoding => 1) unless %{"flag$sql"};
      print $filename.$DELIMITER  if $showfiles;
      print "query taints database by $command{$sql} $sql in: ".
             join(',' , sort keys %{"flag$sql"})."\n" if $showtainted;
      print "query taints $sql in: Blank\n"              if $showtainted;
      undef $taint{$sql}; ### TODO: c pas correct
      %{"flag$sql"} = ();
    }
  }

  sub parsing_sql {
    my ($var , $assignment, $fragment) = @_;
    if ($assignment eq 'constructor') {
      $command{$var} = ($fragment =~ /(select|insert|update|delete)/i ? lc($1) : 'unknown');
    }
    $fragment =~ s/.*?sqlc_//;
    $fragment =~ s/.*?(addslashes\(?\$)/$1/;
    while ($fragment =~ s/^.*?((addslashes\()?\$([->0-9A-Za-z_]+(\[.*?\])?))//) {
      my ($slash , $name) = ($1, $3);
      if ($slash =~ /^addslashes\(/) {
        $taint_flag = 'Addslashed';
      } elsif ($taint{$name}) {
        $taint_flag = $taint{$name}
      } else {
        $taint_flag = 'Rand';
      }
      #$taint_flag = ($taint{$name} ? $taint{$name} : "Rand");
      print "$assignment taints $var by $name in: $taint_flag\n" if $showtainted;
      if ($var =~ /^sqlc/ ) {
        ${"flag$var"}{$taint_flag} = 1;
      }
      #print "$fragment ---------\n"
    }
  }


##EOF##
__END__

=head1 NAME

sqlcheck.pl - phplib source parser

=head1 SYNOPSIS

B<sqlcheck.pl> [-F][-E][-S][-D][-C][-V] [-f][-l][-s][-w] 

B<sqlcheck.pl> {--help,--verbose}

=head1 DESCRIPTION

This script parses PHP code stored in I<src/phplib> for
security audit. Distinct interesting lines of code can be
choosen with uppercases options. These lines are then submitted
to an interpreter that try to detects taints on objects.
The interpreter is able to report taints of an objects on
another. Alarms are printed. You can control output with
lowercases options.
Simply try the I<--verbose> mode to see all da bugs. ;)

=head1 OPTIONS

=head2 I<Generic options>

=item B<-h>, B<--help>

Display options sumary and exit.

=item B<-v>, B<--verbose>

Prints all messages. Activates all options.

=head2 I<Output options>

These switches control the output of the script.
They are lowercases.

=item B<-f>, B<--filename> , B<--nofilename>

Switch on/off the display of filenames in code.

=item B<-l>, B<--linenumb> , B<--nolinenumb>

Switch on/off the display of line numbers in code.

=item B<-s>, B<--shortcut>, B<--noshortcut>

Switch on/off the display of shortcuts in lines.
Some part of command of daCode could be compressed to
improve readability. The compressed code resulting will be
displayed as underscored: B<_compressed_code_>.

=item B<-w>, B<--warnings>, B<--nowarnings>

Switch on/off the display of warnings computed by the interpreter.
The script executes an interpreter which parses each line
selected and prints different warnings.

=head2 I<Input options>

These options controls the input of the script.
They are uppercases.

=item B<-F>, B<--Functions>

Selects and interprets Functions declarations.

=item B<-E>, B<--Escapes>

Selects and interprets escapes. Actually it is only I<addslashes()>

=item B<-S>, B<--Sqlcommands>

Selects and interprets Sql commands constructs made by assignments.

=item B<-D>, B<--Dbqueries>

Selects and interprets calls to I<dbquery()> which read or write
in the database.

=item B<-V>, B<--Validates>

Selects and interprets lines of code which are said I<checked> by
some developper in the source. He wrote a comment saying that one
object is concerned and eventually he can add the reason, his name,
and the date of his check. This is usefull to avoid multiples revisions
when the script failed its parse and prints obvious alarms.

=item B<-T>, B<--Templates>

Selects and interprets templates code. The theme I<linuxfr> is assumed
to be valid. The interpreter makes this assumption to detect deviance
in others themes.

=head1 INTERPRETER INTERNALS

The interpreter just try to say some infos on objects that are submitted.
This info is base on taints. The interpreter can taints object with differents
colors. At this time, colors are
(Rand, Param, Blank, Model, Checked, Addslashed, Hardcoding, Constructed).
Rand and Param colors should be carefully examined.
Other colors are more reliable.

=head1 SEE ALSO

I<doc/txt/devel.txt> will inform about conventions to write code
compliant with this script.

=head1 BUGS

=item Parsing function call

It's not easy for the parser to deal with function call because 
parenthesis are not trivials. for instance,
I<< addslashes($this->foo($bar),$foobar) >> will create obvious dependencies.

=item Parsing PHP structures

The parser cannot detect well tests like this one: 
  1:  if ($test) {
  2:    $sqlc_q = "select ".$foo;
  3:  } else {
  4:    $sqlc_q = "insert ".$bar;
  5:  }

Lines 1, 3 and 5 are not view by the parser. So, it first see
line 2 and say: `sqlc_q depends on foo... fine'. Then, it see line 4
and add: `sqlc_q now not rely on foo... Strange'.
This error is detected and causes an alarm C<<
following constructor taints sqlc_q in: Blank, but was yet tainted ?
>>. However, taints are not very reliable: At line 5, $sqlc_q is
supposed to rely on $bar, and not on $foo.

=head1 TODO

=item Taints variables read from database.

Some fields of the Db must be escaped, but not all. If the script could
detect these inputs, it will prints less warnings.

=head1 AUTHOR

frederic loze

flux B<at> nexen B<dot> net

=cut
