#!/usr/local/bin/perl
# 
# Copyright (c) 1999 Wolfram Schneider <wosch@FreeBSD.org>
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# 
# $Id
# 
# cvsup2httplog - convert a cvsup log file to a HTTP log file
#
# usage: cvsup2httplog < cvsupd.log > httpd.log
#

=pod

=head1 NAME

cvsup2httplog - convert a cvsup log file to a HTTP log file

=head1 SYNOPSIS

cvsup2httplog < cvsupd.log > httpd.log

=head1 DESCRIPTION

cvsup2httplog convert a cvsupd(1) log file to a httpd log file.
The output can be analyzed with standard HTTP log analyzer tools
like analog(1).

=head1 EXAPMPLE

=head2 INPUT

  1998.12.26 14:05:22 CET [797]: +6 wosch@anonymous214.ppp.cs.tu-berlin.de (paula.panke.de.freebsd.org) [REL_15_3/15.4]
  1998.12.26 14:05:33 CET [797]: =6 [23Kin+2252Kout] src-sbin: No scan file found
  1998.12.26 14:05:33 CET [797]: =6  [23Kin+225Kout] src-secure: Found scan file /home/cvsup/cvsup/supd/cvs-crypto/checkouts.cvs
  1998.12.26 14:05:33 CET [797]: =6  [23Kin+252Kout] cvs-crypto: Found scan file /home/cvsup/cvsup/supd/cvs-crypto/checkouts.cvs
  1998.12.26 14:06:57 CET [797]: -6 TreeComp failed: Network read failure: Connection lost
  1999.01.21 15:04:55 CET [1935]: +3289 wosch@freefall.FreeBSD.ORG [REL_15_4_2/15.5]
  1999.01.21 15:04:57 CET [1935]: =3289 [300Kin+600Kout] openbsd-cvs-all: No scan file found
  1999.01.21 15:09:46 CET [1935]: -3289 Finished successfully

=head2 OUTPUT

  anonymous214.ppp.cs.tu-berlin.de - wosch [26/Dec/1998:14:05:33 +0000] "GET /src/sbin/index.html" 200 2329600 "REL_15_3/15.4"
  anonymous214.ppp.cs.tu-berlin.de - wosch [26/Dec/1998:14:05:33 +0000] "GET /src/secure/index.html" 200 253952 "REL_15_3/15.4"
  anonymous214.ppp.cs.tu-berlin.de - wosch [26/Dec/1998:14:05:33 +0000] "GET /cvs/crypto/index.html" 200 281600 "REL_15_3/15.4"
  freefall.FreeBSD.ORG - wosch [21/Jan/1999:15:04:57 +0000] "GET /openbsd/cvs/all/index.html" 200 921600 "REL_15_3/15.4"


=head1 PERFORMANCE

The script will convert 500KBytes/s data on a PentiumII with 300Mhz.

=head1 SEE ALSO

cvsupd

http://www.freebsd.org/handbook/cvsup.html

=head1 AUTHOR

Wolfram Schneider <wosch@FreeBSD.org>, Januar 1999

=cut



use strict;

my (@MoY) = ('Jan','Feb','Mar','Apr','May','Jun',
	     'Jul','Aug','Sep','Oct','Nov','Dec');

sub todate {
    my $date = shift;

    if ($date =~ m%(....)\.(..)\.(..)%) {
	return $3 . '/' . $MoY[$2 - 1] . '/' . $1;
    }
    return undef;
}

sub httpdlog {
    my ($host, $localhost, $user, $date, $time, $isdst, 
	$collection, $code, $traffic, $version) = @_;

    print $host, " ", ($localhost ? $localhost : '-'), " ";
    print ($user ? $user : '-');
    print " ", '[', &todate($date), ':', $time, 
    " ", $isdst, '] "GET /';
    print $collection, '/index.html" ', $code, ' ', 
    $traffic * 1024,
    ' "', $version, '"', "\n";
    
}

my %id;
my ($user, $host, $localhost, $version);

# find local time offset to UTC
my $isdst;
{
    my $time = time;
    my $hour_utc = (gmtime($time))[2];
    my $hour_loc = (localtime($time))[2];

    $isdst = $hour_loc - $hour_utc;
    if ($isdst < 0) {
	$isdst = '-' . ($isdst > -10 ? '0' : '') . abs($isdst) . '00';
    } else {
	$isdst = '+' . ($isdst < 10 ? '0' : '') . $isdst . '00';
    }
}

while(<>) {
    my($date, $time, $tz, $pid, $flag, $kb, $collection, @rest) = split;

    # new session
    if ($flag =~ /^\+([0-9]+)/) {
	$id{ $ 1 } = $kb;
	($user, $host) = split('@', $kb);

	if ($collection =~ s/^\((.*)\)$/$1/ || # local host name
	    $collection =~ s/^<(.*)>$/$1/)     # authenticated
	{
	    $localhost = $host;
            $host = $1;
	    $host =~ s/.*@//;
	}

	if ($rest[0] =~ s/^\[(.*)\]$/$1/) {
	    $version = $rest[0];
	}
    }

    elsif ($flag =~ /^=([0-9]+)/) {
	my $session = $1;
	
	# no trailing colon
	$collection =~ s/:$//;

	# replace '-' with '/', which look more hierachical
	$collection =~ s%-%/%g;
	
	my $code = 200;
	if ($collection eq 'Unknown' && $rest[0] eq 'collection') {
	    $code = 404;
	    $collection = $rest[1];
	    $collection =~ s/\"//g;
	}
	$code = 408 if ($rest[0] eq 'TreeComp');
	$code = 406 if ($rest[0] eq 'Cannot');

	if (defined($id{$session})) {
	    my $traffic;
	    if ($kb =~ /^\[([0-9]+)Kin\+([0-9]+)Kout\]/) {
		$traffic = $1 + $2;
	    } else {
		$traffic = 0;
	    }

	    &httpdlog($host, $localhost, $user, $date, $time, $isdst, 
		      $collection, $code, $traffic, $version);
	} else {
	    warn "Unknown session: $session\n";
	}
    }

    elsif ($flag =~ /^-([0-9]+)/) {
	my $session = $1;
	if (defined($id{$session})) {
	    undef $id{$session};
	} else {
	    warn "Unknown session finished: $session\n";
	}
	#undef $user, $host, $localhost, $version;
    }

    elsif ($flag eq 'Authentication' && $kb eq 'failed:') {
	my ($user, $host) = split('@', $collection);

	if ($collection =~ s/^\((.*)\)$/$1/ || # local host name
	    $collection =~ s/^<(.*)>$/$1/)     # authenticated
	{
	    $localhost = $host;
            $host = $1;
	    $host =~ s/.*@//;
	}

	if ($rest[0] =~ s/^\[(.*)\]$/$1/) {
	    $version = $rest[0];
	}

	&httpdlog($host, $localhost, $user, $date, $time, $isdst, 
		  'forbidden', 403, 0, $version);
    }
}
    
