#! /usr/bin/perl -w
# vim:syntax=perl

use strict;
use lib '/usr/share/perl5';
use Lire::DlfSchema;
use Lire::Program qw( :msg :dlf );
use Time::Local;

#
# set to non-zero only in case of testing.  running this script in debug
# mode on production-size logs will fill your disk pretty fast.
#
my $debug = 0;
sub debug {
    $debug and lr_debug($_[0]);
}

my $lines	= 0;
my $dlflines	= 0;
my $errorlines  = 0;

my $schema = eval { Lire::DlfSchema::load_schema( "database" ); };
lr_err( "error loading database schema: $@" ) if $@;
my $dlf_maker =
  $schema->make_hashref2asciidlf_func(
    qw/time user remote_host action database query
       success result connection_id run_id/
  );

my $prevtime = "";
my $sqlquery = "";

my $runid;
my %sessions;
my %hosts;
my %databases;

my $query = {};
my @queries = ();

sub print_dlf
{
    my ($c, $print_only_x ) = @_;

    my $subroutine = 'print_dlf';

    my %dlf = map { $_ => $c->{$_} }
    qw/time user remote_host action database query success result connection_id/;
    foreach my $f ( qw/time user remote_host action database query success result connection_id/ ) {
	$dlf{$f} = $c->{$f};
    }
	my $dlf = $dlf_maker->( \%dlf );
	print join( " ", @$dlf ), "\n";
	$dlflines++;
}

sub new_command {
    debug("new command");

    # store previous query (if != null)
    if ($query->{"time"}) {
        debug("Storing line: $query");
        push(@queries, $query);
    }

    # empty temp
    $query = {};
}

sub parse_mysql_line {
    my ( $line ) = @_;

    debug("Parsing: $line");
    if ($line =~ /mysqld, Version:/i) {
	# skip startup info line
	$runid++;
    } elsif ($line =~ /^Tcp port/i) {
        # skip startup info line
        ;
    } elsif ($line =~ /^Time\s*Id\s*Command\s*Argument/) {
        # skip header line
        ;
    } else {
        my $query_part = 1;
        my $time = $prevtime;
        if ($line =~ s/^(\d\d)(\d\d)(\d\d)\s([\s|\d]\d):(\d\d):(\d\d)\s+//) {
            # new time stamp
            my ( $year, $month, $day, $hour, $min, $sec ) =
                ($1, $2, $3, $4, $5, $6);
            $time = timelocal( $sec, $min, $hour, $day, $month -1, $year );
            $prevtime = $time;
            # no continuation of query
            new_command();

            $query_part = 0;
            $sqlquery = "";
        } elsif ($line =~ /^\t\t\s*\d+ /) {
            # no continuation of query
            new_command();

            $query_part = 0;
            $sqlquery = "";
            $line =~ s/^\t\t\s*//;

            # this is a dirty hack to keep lines in order
            $time = ++$prevtime;
	}

        if ($query_part) {
            debug("adding query part");

	    $sqlquery .= $line;
            $query->{query} = $sqlquery;
	} else {
            debug($line);

            # parse line
            #
            # this will raise an error if $line is shorter than 8 chars
            # we're looking for char 9 .. 18

#            unless ($line =~ /^\d+[a-zA-Z ]{11}/) {
#                # when command is Quit, we have only 11 command-and-space
#                # chars, not 12
#
#                # skip these lines
#                die("expected command and session id, i.g. '54 Query'. " .
#                    "Found '$line'\n");
#            }

           $line =~ /^(\d+)\s+([A-Z][a-zA-Z ]*[a-zA-Z])\s+/ or die("expected command and session id, i.g. '54 Query'. Found '$line'\n");

           my ($session_id, $command) = ($1, $2);

	    # my $command = substr($line, 8, 10);
            $command =~ s/\s//g;
            $command = lc($command);
	    # my $session_id = substr($line, 0, 7);
            $session_id =~ s/\s//g;

            $query->{time} ||= $time;
            $query->{run_id} ||= $runid;
            $query->{action} ||= $command;
            $query->{connection_id} ||= $session_id;

            debug("time      : " . $time);
            debug("action    : " . $command);
            debug("session_id: " . $session_id);

            if ($command eq "connect") {
                # try to deduce the remote_host and user
		if ($line =~ /(\w*)\x40([\w|\.]*)/) {
                    # user and hostname found
                    my $user = $1;
                    my $remote_host = $2;

                    debug("user      : " . $user);
                    debug("remotehost: " . $remote_host);

                    $query->{user} ||= $user;
                    $query->{remote_host} ||= $remote_host;

                    # store user/host for other lines in this session
                    $sessions{$runid . "-" . $session_id} = $user;
                    $hosts{$runid . "-" . $session_id} = $remote_host;
                }
                # try to deduce success
                if ($line =~ /access denied/i) {
                    $query->{success} ||= "no";
                } else {
                    $query->{success} ||= "yes";
                }
            } else {
                $query->{user} ||= $sessions{$runid . "-" . $session_id};
                $query->{remote_host} ||= $hosts{$runid . "-" . $session_id};
                $query->{database} ||= $databases{$runid . "-" . $session_id};
            }

            $line =~ /^\d+\s+[a-zA-Z ]+\t([^\t]+)$/;
            if ($command eq "initdb") {
		# try to deduce the database name
		my $dbname = $1;
		# my $dbname = substr($line, 20);

                debug("database    : " . $dbname);
		$query->{database} ||= $dbname;
		$databases{$runid . "-" . $session_id} = $dbname;
            } elsif ($command eq "query") {
                # $sqlquery = substr($line, 20);
                $sqlquery = $1;
                $query->{query} = $sqlquery;
            }
        }
    }
}

init_dlf_converter( "database" );
$runid = 0;
while ( <> ) {
    lire_chomp;
    $lines++;

    eval {
        parse_mysql_line( $_ );
    };
    if ($@) {
        lr_warn( $@ );
        lr_warn( "failed to parse '$_'. Skipping." );
        $errorlines++;
    }
}

# now print all delayed messages not yet sent
foreach my $query (@queries) {
    print_dlf( $query );
}

end_dlf_converter( $lines, $dlflines, $errorlines );

__END__

=pod

=head1 NAME

mysql2dlf - convert mysql logfiles to dlf format

=head1 SYNOPSIS

B<mysql2dlf>

=head1 DESCRIPTION

B<mysql2dlf> converts a MySQL query log file to DLF format.  Information on
MySQL can be found on http://www.mysql.org/. The query log file is defined
by the --log[=file] option of B<mysqld>, or the log=file setting in MySQL
configuration file.

Such a log file looks like e.g.

 /usr/sbin/mysqld, Version: 3.23.43-debug-log, started with:
 Tcp port: 3306  Unix socket: /var/run/mysqld/mysqld.sock
 Time                 Id Command    Argument
 011218 15:02:58       1 Connect     root@localhost on
                       1 Query       SHOW VARIABLES LIKE 'pid_file'
                       1 Shutdown
 011226 21:35:59       6 Connect     joe@localhost on
                       6 Init DB     nmrshiftdb
                       6 Query       SHOW VARIABLES

or

                  42401 Query       SELECT
                  bannerID,
                  banner,
                  format,
                  width,
                  height,
                  alt,
                  url
              FROM
                  banners
              WHERE

                  act = 'true'  AND (keyw = 'fullsize' OR keyw = 'global')



The generic database dlf format is described in database.xml.

=head1 EXAMPLES

To process a log as produced by MySQL:

 $ mysql2dlf < mysql-log

mysql2dlf will be rarely used on its own, but is more likely
called by lr_log2report:

 $ lr_log2report mysql < /var/log/mysql-log

=head1 NOTES

The parser has been tested for query log files for MySQL 3.23.46 only.

=head1 BUGS

This converter stores all log files information in one big nested array in
memory, before writing.  Therefore, memory consumption is linear with the
size of the log.

=head1 SEE ALSO

mysql(1), mysqld(1)

=head1 VERSION

$Id: mysql2dlf.in,v 1.12 2010/12/05 20:36:45 wraay Exp $

=head1 COPYRIGHT

Copyright (C) 2001, 2002 Stichting LogReport Foundation LogReport@LogReport.org

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html.

=head1 AUTHOR

Egon Willighagen <egonw@logreport.org>

=cut

# Local Variables:
# mode: cperl
# End:
