#!/usr/bin/perl -w

# bindgraph -- a BIND statistics rrdtool frontend
# copyright (c) 2003 Marco Delaurenti <dela@linux.it>
# copyright (c) 2003 Marco d'Itri <md@linux.it>
# based on mailgraph (c) David Schweikert <dws@ee.ethz.ch>
# Released under the terms of the GNU General Public License.

my $rrdstep = 60;

my $daemon_logfile = '/var/log/bindgraph.log';
my $daemon_rrd_dir = '/var/lib/bindgraph';
my $daemon_pidfile = '/var/run/servergraph/bindgraph.pid';
my $rrd = 'bindgraph.rrd';

my @query_t = qw(TKEY SOA PTR A AAAA CNAME MX NS ANY _other_);

##############################################################################
##############################################################################
# I'm embedding Parse::Syslog not to help lazy admins but because BIND
# logs cannot be parsed by the standard version.

package Parse::Syslog;

use Carp;
use Symbol;
use Time::Local;
use strict;
use vars qw($VERSION);

$VERSION = '0.05';

my %months_map = (
    'Jan' => 0, 'Feb' => 1, 'Mar' => 2,
    'Apr' => 3, 'May' => 4, 'Jun' => 5,
    'Jul' => 6, 'Aug' => 7, 'Sep' => 8,
    'Oct' => 9, 'Nov' =>10, 'Dec' =>11,
    'jan' => 0, 'feb' => 1, 'mar' => 2,
    'apr' => 3, 'may' => 4, 'jun' => 5,
    'jul' => 6, 'aug' => 7, 'sep' => 8,
    'oct' => 9, 'nov' =>10, 'dec' =>11,
);

# fast timelocal
my $str2time_last_time;
my $str2time_last_day;
my $str2time_last_month;
my $enable_year_decrement = 1; # year-increment algorithm: if in january, if december is seen, decrement
                               # year
# 0: sec, 1: min, 2: h, 3: day, 4: month, 5: year
sub str2time($$$$$$$)
{
    my $GMT = pop @_;
    my $day_secs = $_[2]*3600+$_[1]*60+$_[0];
    if(defined $str2time_last_time) {
        if( $_[3] == $str2time_last_day and
            $_[4] == $str2time_last_month )
        {
            return $str2time_last_time + $day_secs;
        }
    }

    my $time;
    if($GMT) {
        $time = timegm(@_);
    }
    else {
        $time = timelocal(@_);
    }

    $str2time_last_time = $time - $day_secs;
    $str2time_last_day = $_[3];
    $str2time_last_month = $_[4];

    return $time;
}

sub new($$;%)
{
    my ($class, $file, %data) = @_;
    croak "new() requires one argument: file" unless defined $file;
    %data = () unless %data;
    if(not defined $data{year}) {
        $data{year} = (localtime(time))[5]+1900;
    }
    $data{_repeat}=0;

    if(ref $file eq 'File::Tail') {
        $data{filetail} = 1;
        $data{file} = $file;
    }
    else {
        $data{file}=gensym;
        open($data{file}, "<$file") or croak "can't open $file: $!";
    }

    return bless \%data, $class;
}

sub _next_line($)
{
    my $self = shift;
    my $f = $self->{file};
    if(defined $self->{filetail}) {
        return $f->read;
    }
    else {
        return <$f>;
    }
}

sub next($)
{
    my ($self) = @_;

    while($self->{_repeat}>0) {
        $self->{_repeat}--;
        return $self->{_repeat_data};
    }

    line: while(my $str = $self->_next_line()) {
        # date, time and host
        $str =~ /^
            (\w{3})\s+(\d+)   # date  -- 1, 2
            \s
            (\d+):(\d+):(\d+) # time  -- 3, 4, 5
			(?:\.\d{3})?	# XXX ms, added by Md for BIND logs
            \s
            ([-\w\.]+)        # host  -- 6
            \s+
            (.*)              # text  -- 7
            $/x or do
        {
            carp "line not in syslog format: $str";
            next line;
        };

        my $mon = $months_map{$1};
        defined $mon or croak "unknown month $1\n";

        # year change
        if($mon==0) {
            $self->{year}++ if defined $self->{_last_mon} and $self->{_last_mon} == 11;
            $enable_year_decrement = 1;
        }
        elsif($mon == 11) {
            if($enable_year_decrement) {
                $self->{year}-- if defined $self->{_last_mon} and $self->{_last_mon} != 11;
            }
        }
        else {
            $enable_year_decrement = 0;
        }

        $self->{_last_mon} = $mon;

        # convert to unix time
        my $time = str2time($5,$4,$3,$2,$mon,$self->{year}-1900,$self->{GMT});

        my ($host, $text) = ($6, $7);

        # last message repeated ... times
        if($text =~ /^last message repeated (\d+) time/) {
            next line if defined $self->{repeat} and not $self->{repeat};
            next line if not defined $self->{_last_data}{$host};
            $1 > 0 or do {
                carp "last message repeated 0 or less times??";
                next line;
            };
            $self->{_repeat}=$1-1;
            $self->{_repeat_data}=$self->{_last_data}{$host};
            return $self->{_last_data}{$host};
        }

        # marks
        next if $text eq '-- MARK --';

        # some systems send over the network their
        # hostname prefixed to the text. strip that.
        $text =~ s/^$host\s+//;

        $text =~ /^
            ([^:]+?)        # program   -- 1
            (?:\[(\d+)\])?  # PID       -- 2
            :\s+
            (?:\[ID\ (\d+)\ ([a-z0-9]+)\.([a-z]+)\]\ )?   # Solaris 8 "message id" -- 3, 4, 5
            (.*)            # text      -- 6
            $/x or do
        {
            carp "line not in syslog format: $str";
            next line;
        };

        if($self->{arrayref}) {
            $self->{_last_data}{$host} = [
                $time,  # 0: timestamp
                $host,  # 1: host
                $1,     # 2: program
                $2,     # 3: pid
                $6,     # 4: text
                ];
        }
        else {
            $self->{_last_data}{$host} = {
                timestamp => $time,
                host      => $host,
                program   => $1,
                pid       => $2,
                msgid     => $3,
                facility  => $4,
                level     => $5,
                text      => $6,
            };
        }

        return $self->{_last_data}{$host};
    }
    return undef;
}

##############################################################################
# this code parses the querylog of BIND 9.3

package Parse::Log::Bind93;
use base qw(Parse::Syslog);
use Carp;

sub next($)
{
	my ($self) = @_;

	LINE: while (my $str = $self->_next_line) {
		my ($d, $m, $y, $hh, $mm, $ss, $text) = (
			substr($str, 0, 2), substr($str, 3, 3), substr($str, 7, 4),
			substr($str, 12, 2), substr($str, 15, 2), substr($str, 18, 2),
			substr($str, index($str, ' ', 35) + 1)
		);
		chomp $text;

		my $mon = $months_map{$m};
		if (not defined $mon) {
			carp "unknown month '$m' found while parsing: $str\n";
			next LINE;
		}

		# year change
		if ($mon == 0) {
			$self->{year}++ if defined $self->{_last_mon}
				and $self->{_last_mon} == 11;
			$enable_year_decrement = 1;
		} elsif ($mon == 11) {
			if ($enable_year_decrement) {
				$self->{year}-- if defined $self->{_last_mon}
					and $self->{_last_mon} != 11;
			}
		} else {
			$enable_year_decrement = 0;
		}

		$self->{_last_mon} = $mon;

		# convert to unix time
		my $time = Parse::Syslog::str2time($ss, $mm, $hh, $d, $mon,
			$self->{year}-1900, $self->{GMT});

		return [
			$time,	# 0: timestamp
			undef,	# 1: host
			undef,	# 2: program
			undef,	# 3: pid
			$text,	# 4: text
		];
	}
	return undef;
}


##############################################################################
##############################################################################

package main;
use RRDs;
use File::Tail;
use Getopt::Long;
use POSIX 'setsid';
use strict;

my $VERSION = '0.2';

# global variables
my $year;
my $this_minute;
my %sum = map { $_ => 0 } @query_t;
my %exist_qt = map { $_ => 1 } @query_t;

my $rrd_inited = 0;
my $verbose = 0;
my $ignore_localhost = 0;

# prototypes
sub daemonize();
sub process_line($);
sub event_query($$);
sub init_rrd($);
sub update($);

main();
exit 0;

sub main {
	my %opt;

	Getopt::Long::Configure('no_ignore_case');
	GetOptions(\%opt, 'help|h', 'cat|c', 'logfile|l=s', 'version|V',
		'year|y=i', 'host=s', 'verbose|v+', 'daemon|d!', 'format=s',
		'daemon_pid=s', 'ignore_localhost|ignore-localhost',
		'daemon_rrd=s', 'rrd_name|rrd-name=s', 'daemon_log=s'
	) or exit 1;
	usage() if $opt{help} or @ARGV;

	if ($opt{version}) {
		print STDERR "bindgraph $VERSION by {dela,md}\@linux.it\n";
		exit 0;
	}

	$verbose = $opt{verbose} if $opt{verbose};
	$ignore_localhost = 1 if $opt{ignore_localhost};

	$daemon_pidfile = $opt{daemon_pid} if $opt{daemon_pid};
	$daemon_logfile = $opt{daemon_log} if $opt{daemon_log};
	$daemon_rrd_dir = $opt{daemon_rrd} if $opt{daemon_rrd};
	$rrd		= $opt{rrd_name}.".rrd" if defined $opt{rrd_name};
	daemonize() if $opt{daemon};

	my $host = $opt{host};
	my $logfile = $opt{logfile} ? $opt{logfile} : '/var/log/syslog';
	my $file;
	if ($opt{cat}) {
		$file = $logfile;
	} else {
		$file = File::Tail->new(name => $logfile, tail => -1);
	}

	my @parser_args = ($file, year => $opt{year}, arrayref => 1);
	my $parser;
	if (not $opt{format} or $opt{format} eq 'bind93') {
		if ($host) {
			print STDERR "--format=bind93 and --host are not compatible.\n";
			exit 1;
		}
		$parser = new Parse::Log::Bind93(@parser_args);
	} elsif ($opt{format} and $opt{format} eq 'bind92') {
		$parser = new Parse::Syslog(@parser_args);
	} else {
		print STDERR "Unknown log format '$opt{format}'.\n";
		exit 1;
	}

	while (my $sl = $parser->next) {
		if ($host) {
			next if $sl->[1] ne $host;
			next if $sl->[2] ne 'named' and $sl->[2] ne 'client';
		}
		next if ( $ignore_localhost and $host=~/127.0.0.1\#/ );
		process_line($sl);
	}
}

sub process_line($) {
	my ($sl) = @_;

	if ($sl->[4] !~ /query:\s+\S+\s+IN\s+(\S+)/) {
		print STDERR "Cannot parse this line: $sl->[4]\n" if $verbose;
		return;
	}

	event_query($sl->[0], $exist_qt{$1} ? $1 : '_other_');
}

sub event_query($$) {
	my ($t, $type) = @_;

	update($t) and $sum{$type}++;
}

# returns 1 if $sum should be updated
sub update($) {
	my ($t) = @_;

	my $m = $t - $t % $rrdstep;
	init_rrd($m) unless $rrd_inited;
	return 1 if $m == $this_minute;
	return 0 if $m < $this_minute;

	my $string_rrd = $this_minute;
	$string_rrd .= ":$sum{$_}" foreach @query_t;
	print STDERR "update $string_rrd\n" if $verbose > 1;
	RRDs::update($rrd, $string_rrd);
	my $err = RRDs::error;
	die "RRDs::update($rrd, ...): $err" if $err;

	if ($m > $this_minute + $rrdstep) {
		for (my $sm = $this_minute + $rrdstep; $sm < $m; $sm += $rrdstep) {
			$string_rrd = $sm;
			$string_rrd .= ":0" foreach @query_t;
			print STDERR "update $string_rrd (SKIP)\n" if $verbose > 1;
			RRDs::update($rrd, $string_rrd);
			$err = RRDs::error;
			die "RRDs::update($rrd, ...): $err" if $err;
		}
	}
	$this_minute = $m;
	$sum{$_} = 0 foreach @query_t;
	return 1;
}

sub init_rrd($) {
	my ($m) = @_;
	my $err;

	if (-f $rrd) {
		$this_minute = RRDs::last($rrd) + $rrdstep;
		$err = RRDs::error;
		die "RRDs::last($rrd): $err" if $err;
		return;
	}

#xff: The xfiles factor defines what part of a consolidation interval may be
# made up from *UNKNOWN* data while the consolidated value is still regarded
# as known.
#steps: defines how many of these primary data points are used to build a
# consolidated data point which then goes into the archive.
#rows: defines how many generations of data values are kept in an RRA.

	my $day_steps = 1;
	# fill one week of data ($row * $rrdstep sec)
	my $rows = (24 * 3600 * 7) / $rrdstep;
	my $realrows = int($rows * 1.1); # ensure that the full range is covered

	# use multiples, otherwise rrdtool could choose the wrong RRA
	my $week_steps = $day_steps * 7;
	my $month_steps = $week_steps * 5;
	my $year_steps = $month_steps * 12;

	RRDs::create($rrd, '--start', $m - $rrdstep, '--step', $rrdstep,
		# data source: DS:ds-name:DST:heartbeat:min:max
		(map { "DS:$_:ABSOLUTE:" . ($rrdstep * 2) . ':0:U' } @query_t),
		# RR archive:  RRA:CF:xff:steps:rows
		"RRA:AVERAGE:0.5:$day_steps:$realrows",		# day
		"RRA:AVERAGE:0.5:$week_steps:$realrows",	# week
		"RRA:AVERAGE:0.5:$month_steps:$realrows",	# month
		"RRA:AVERAGE:0.5:$year_steps:$realrows",	# year
		"RRA:MAX:0.5:$day_steps:$realrows",			# day
		"RRA:MAX:0.5:$week_steps:$realrows",		# week
		"RRA:MAX:0.5:$month_steps:$realrows",		# month
		"RRA:MAX:0.5:$year_steps:$realrows",		# year
	);
	$err = RRDs::error;
	die "RRDs::create($rrd, ...): $err" if $err;

	$this_minute = $m;
	
	$rrd_inited = 1;
}

sub daemonize() {
	chdir($daemon_rrd_dir)
		or die "bindgraph: can't chdir to $daemon_rrd_dir: $!";
	-w $daemon_rrd_dir or die "bindgraph: can't write to $daemon_rrd_dir\n";
	open(STDIN, '/dev/null') or die "bindgraph: can't read /dev/null: $!";
	if ($verbose) {
		open(STDOUT, ">>$daemon_logfile")
			or die "bindgraph: can't write to $daemon_logfile: $!";
	} else {
		open(STDOUT, '>/dev/null')
			or die "bindgraph: can't write to /dev/null: $!";
	}
	open(STDERR, '>&STDOUT') or die "bindgraph: can't dup stdout: $!";

	my $pid = fork;
	die "bindgraph: can't fork: $!" if not defined $pid;

	if ($pid) {		# parent
		exit 0 if not $daemon_pidfile;

		open(PIDFILE, ">$daemon_pidfile")
			or die "bindgraph: can't write to $daemon_pidfile: $!\n";
		print PIDFILE "$pid\n";
		close PIDFILE;
		exit 0;
	}

	# child
	setsid() or die "bindgraph: can't start a new session: $!";
}

sub usage {
	print STDERR <<"EOM";
usage: bindgraph [options]

  -h, --help         display this help and exit
  -v, --verbose      be verbose about what you do
  -V, --version      output version information and exit
  -c, --cat          causes the logfile to be only read and not monitored
  -l, --logfile f    monitor logfile f instead of /var/log/syslog
  -y, --year         starting year of the log file (default: current year)
      --host=HOST    use only entries for HOST (regexp) in syslog
  -d, --daemon       start in the background
      --format=TYPE  parse logs in TYPE format
  --daemon_pid=FILE  write the PID to FILE
  --daemon_rrd=DIR   write the RRD to DIR instead of $daemon_rrd_dir
  --rrd-name=NAME    use NAME.rrd for the RRD file instead
  --daemon_log=FILE  write verbose-log to FILE instead of $daemon_logfile

TYPE may be bind92 or bind93 (default).
EOM
	exit 0;
}

# vim ts=4
