# BETA! $Id$
####################
# w3c2isa.pl by Shawn Quillman - squillman@att.net adapter by Peter STEVENINCK to convert ISA logfiles for awstats
# Converts ISA Formatto  W3c Extended Log Format 
# Provided as is with no warranty or guarantee
# Usage: w3c2isa.pl [-qv] [-h count]  -i inputfile [-o outputfile]
# Options:
#	-q              quiet mode, no output
#       -v              overwrite existing output file without confirming
#	-h              print hash marks (#) to monitor progress, one mark every
#			<count> lines
#    
#       -i inputfile    name of W3C Extended Log Format input file.  If
#                       inputfile is a directory all files
#                       in the directory will be processed and output files
#                       will be saved in that directory with a
#                       prefix of ISA-.
#       -o outputfile   name of ISA Log Format output file.  If not specified
#                       the output file will be saved in the same
#                       directory as inputfile with a prefix of ISA-.
#                       Ignored if inputfile is a directory.
####################
#IMPORTANT: w3c date format in one field, CS-METHOD, CS-REFERER are compulsory fields
$webserverIP="123.123.123.123"; #this is the IP adress of the webserver
####################
# Check for appropriate number of command line arguments.
# Exit if incorrect
####################
#@ARGV=("-i","IN.LOG","-o","OUT.LOG");
$numargs=@ARGV;

if (($numargs < 4) || ($numargs > 11)) {
	$msg=("\nUsage: isa2w3c.pl [-qv] [-h count] -t GMT-offset -i inputfile [-o outputfile]\n");
	$msg=$msg.("Options:\n");
	$msg=$msg.("\t-q\t\tquiet mode, no output\n");
	$msg=$msg.("\t-v\t\toverwrite existing output file without confirming\n");
	$msg=$msg.("\t-h\t\tprint hash marks to monitor progress, one mark every\n");
 	$msg=$msg.("\t\t\t<count> lines\n");
	$msg=$msg.("\t-i inputfile\tname of W3C Extended Log Format input file.  If\n");
 	$msg=$msg.("\t\t\tinputfile is a directory all files\n");
	$msg=$msg.("\t\t\tin the directory will be processed and output files\n");
 	$msg=$msg.("\t\t\twill be saved in that directory with a\n");
	$msg=$msg.("\t\t\tprefix of AWSTATS-.\n");
	$msg=$msg.("\t-o outputfile\tname of ISA LOG Format output file.  If not specified\n");
	$msg=$msg.("\t\t\tthe output file will be saved in the same\n");
	$msg=$msg.("\t\t\tdirectory as inputfile with a prefix of AWSTATS-.\n");
	$msg=$msg.("\t\t\tIgnored if inputfile is a directory.\n");
	$msg=$msg.("\t-host host for awstats.[host].conf\n\t\tseparate them by ','\n");
	
	die($msg);
}


####################
# Parse command line arguments
# Exit if an argument is not valid
####################
$offsetflag=0;			# set to 1 when offset arg is found
$inputfileflag=0;		# set to 1 when inputfile arg is found
$outputfileflag=0;		# set to 1 when outputfile arg is found
$hashflag=0;			# set to 1 when hash mark arg is found
$quiet=0;			# set to 1 if q option is specified
$hashon=0;			# set to 1 if h option is specified

$hashcount;			# set to <count> when hash options is specified
$overwrite=0;			# set to 1 if v option is specified
$hostconfigflag=0;		# set to 1 if host option is specified

foreach $arg (@ARGV) {
	if ($offsetflag) {
		$offset=$arg;
		$offsetflag=0;
	} elsif ($inputfileflag) {
		$inputfile=$arg;
		$inputfileflag=0;
	} elsif ($outputfileflag) {
		$outputfile=$arg;
		$outputfileflag=0;
	} elsif ($hostconfigflag) {
		$hostconfigfilelist=$arg;
		$hostconfigflag=0;
	} 
		elsif ($hashflag) {
		$hashon=1;
		$hashcount=$arg;
		$hashflag=0;
	} elsif ($arg=~/q/) {
		$quiet=1;
	} elsif ($arg=~/v/) {
		$overwrite=1;
	} elsif ($arg eq "-h") {
		$hashflag=1;
	} elsif ($arg eq "-t") {
		$offsetflag=1;
	} elsif ($arg eq "-i") {
		$inputfileflag=1;
	} elsif ($arg eq "-o") {
		$outputfileflag=1;
	} 
	elsif ($arg eq "-host") {
		$hostconfigflag=1;
	} 
else {
		if (!$hashflag) {
			die("\nInvalid argument: ",$hostconfig."-".$arg,"\n");
		}
	}
}
# 
# splitting in different configfiles
@hostconfigfile = split(",",$hostconfigfilelist);
foreach $hostconfigfilelist (@hostconfigfile) {
print($hostconfigfilelist."\n > hosts.log \n");}


#

####################
# Confirm existence of input file
# Exit if not found
####################
if (!(-e $inputfile)) {
	die("\nCannot find input file or directory\n");
}

####################
# Examine input file
# If it's a directory get the list of contents
# Otherwise create a single item list
####################
if (-d $inputfile) {
	opendir (INDIR,$inputfile);
	@infiles=readdir(INDIR);
	closedir(INDIR);
} else {
	@infiles=($inputfile);
	if ($outputfile eq "") {	
        @path = split(/\\/,$inputfile);
		$numlevels=@path;
		$path[$numlevels]="isa\/".$path[$numlevels];
		$outputfile=join("\/",@path);
         print ("Output:$outputfile");

	}
}
foreach $infile (@infiles) {
print($infile."\n");
}

####################
# Convert each file in the list
####################
foreach $infile (@infiles) {				# loop through the file list
	if (-d $inputfile) {
		$outputfile="AWSTATS\/".$infile;
		#$outputfile="AWSTATS\/".$infile;	# don't have an output file name yet since the input file was
		$infile=$inputfile."\/".$infile;		# a directory, so make one for each file in the directory
	}

	
	
	if (($infile ne ".") && ($infile ne "..") && (!(-d $infile))) {		# ignore . , .. and any subdirectories
		if (!$quiet) {
			print("Converting ",$infile,"\n");
			print("Saving as ",$outputfile,"\n");
			
		}
               # $overwrite="y";
		if (!$overwrite) {				# confirm overwrite of existing output file if not explicitely told to by arg
			if (-e $outputfile) {
				print("\nOutput file ",$outputfile," exists, overwrite (y/n)? ");
				$overwrite=<STDIN>;
				chop($overwrite);
				if (($overwrite eq "n") || ($overwrite eq "N")) {
					if (!$quiet) {
						print ("Skipping...\n");
					}
					next;
				}
			print ("\n");
			}
		}
		&doConvert($infile,$outputfile);		# call sub that will perform the conversion
	}
}


####################
# Begin conversion of file
####################
sub doConvert {

	my($inputfile,$outputfile)=@_;
	print("inputfile:$inputfile \n");
	print("outputfile:$outputfile \n");
	
	open (OUTFILE,"> $outputfile");
	open (INFILE,"$inputfile");
	for ($x=0;$x<=3;$x++) {			# get the line that defines the fields that were logged in the file
		$fieldsline=<INFILE>;
	}
#Fields: c-ip	cs-username	c-agent	date	time	s-computername	cs-referred	r-host	r-ip	r-port	time-taken	cs-bytes	sc-bytes	cs-protocol	s-operation	cs-uri	s-object-source	sc-status
        $fieldsline=substr($fieldsline,9,length($fieldsline)-9);	# parse out the fields: characters
	@loggedfields=split(/\t/,$fieldsline);				# create a list of the fields that were logged
	for ($i=0;$i<scalar(@loggedfields);$i++) {
        #$sourceField[$i]=$loggedfields[$i];
        print("loggedfield in ISA=",$loggedfields[$i],"\n");
        $loggedFieldKey[$i]=$loggedfields[$i];
        #print("$loggedFieldKey[$i]=",$loggedFieldKey[$i],"\n");
        
        
        #$isalogline{$loggedfields[$i]}=$loggedfields[$i];
        #$loggedfields[$i];
       # print  ("isalog[".$loggedfields[$i]."]=".$isalog{$loggedfields[$i]}."\n");
        }
        
       
      
        # targetfields for w3c
        @targetfields=("date","c-ip","cs-username","cs-method","cs-uri-stem","sc-status","sc-bytes","cs-version","cs(User-Agent)","cs(Referer)");
                
               
        
        
        
        $fieldsline=<INFILE>;
        #$fieldsline=substr($fieldsline,9,length($fieldsline)-9);	# parse out the fields: characters
	@loggedfields=split(/\t/,$fieldsline);		
        $i=0;
        
        chop($fieldsline);
	#$fieldsline=substr($fieldsline,9,length($fieldsline)-9);	# parse out the fields: characters
	
	@loggedfields=split(/\t/,$fieldsline);				# create a list of the fields that were logged
	
	@allfields=("c-ip","cs-username","c-agent","sc-authenticated","date","time","s-svcname","s-computername","cs-referred","r-host","r-ip","r-port","time-taken","cs-bytes","sc-bytes","cs-protocol","cs-transport","s-operation","cs-uri","cs-mime-type","s-object-source","sc-status","s-cache-info","rule#1","rule#2");
	#LogFormat = "%time2 %host %method %url %code %bytesd "
	$index=0;
	$idate=-1;
        print("we are reading the file\n");
        $linecount=0;
        $starttime=gmtime;
        $header="# Software: isa2w3c.pl\n# Version: 1.0\n#Date: 2002-09-04 00:00:14\n# Started at:$starttime\n# ";
			for ($w3cindex=0;$w3cindex<scalar(@targetfields);$w3cindex++)
			{
			$header=$header.$targetfields[$w3cindex]."\t";
			}
			print OUTFILE ($header,"\n");
        while ($line=<INFILE>) {
        chop($line);
			
        $cpt=0;
        $isalogline=();
        @loggedfields=split(/\t/,$line);				# create a list of the fields that were logged
	
	foreach $loggedfield (@loggedfields){
        #print("loggedfield content=",$loggedfield,"\n");
        $key=$loggedFieldKey[$cpt++];
        #print ("key=",$key,"\n");
        $isalogline{$key}= $loggedfield;
        #print("isalogline=",$isalogline{$key},"\n---------------\n");
        }
      #  while( my ($key, $value) = each(%isalogline) ) {
       # print "$key => $value\n";
      
        $linecount++;
        $w3cline=$isalogline{"date"}." ";
        $w3cline.=$isalogline{"time"}."\t";
        $w3cline.=$isalogline{"c-ip"}."\t";
        $w3cline.=$isalogline{"cs-username"}."\t";
        #cs-method in W3C is s-operation in ISA
        $w3cline.=$isalogline{"s-operation"}."\t";
        #cs-uri-stem in w3c is r-host combined with a part of cs-uri
	# ISA Logfiles split r-host and uri, so we need to replace the IP by www.website.com
          $csuristem=$isalogline{"r-host"}.$isalogline{"cs-uri"};
          $csuristem =~ s/http:\/\/$webserverIP//g;

      
	$w3cline.="http://".$csuristem."\t";
        $w3cline.=$isalogline{"sc-status"}."\t";
        $w3cline.=$isalogline{"sc-bytes"}."\t";
        $w3cline.="HTTP/1.1\t";
        $w3cline.=$isalogline{"c-agent"}."\t";
        $w3cline.="UNKNOWN\t";
              
        #print ("\nw3cline:".$w3cline."\n");
        print OUTFILE ($w3cline."\t\n");
        $linecount++;
        
        }
        #print("The following fields were detected:\n");
        #while( my ($key, $value) = each(%isalogline) ) {
        #print "$key => $value\n";}
        $endtime=gmtime;
	print OUTFILE ("# ended at: $endtime      in sec");
 	print ("export $outputfile \n");
	
	foreach $hostconfigfile (@hostconfigfile) { 
	$logname=$hostconfigfile.$outputfile.".log";
	$logname=~ s/\///g;
	print("perl awstats.pl -config=$hostconfigfile -logfile=$outputfile -update  > ".$logname." \n******\n"); 
	system("perl awstats.pl -config=$hostconfigfile -logfile=$outputfile -update > ".$logname); 
	}
	
	print("Lines: $linecount \nEND\n");
	
        if ($hashon) {								# print hash character (#) if requested
			if ($linecount % $hashcount == 0) {
				print ("#");
			}
		}
	}
	close (OUTFILE);
	close (INFILE);
	#execution of awstats.pl
	
	if (!$quiet) {
		print ("\n",$linecount," lines processed \n");				# print total lines processed for the file
	
      }
        


sub time_diff{
    local($t1,$t2) = @_;

    $t1 =~ /^.*\s\d{2,2}:(\d{2,2}):(\d{2,2})\s.*$/;
    $sec1 = 60*$1 + $2;

    $t2 =~ /^.*\s\d{2,2}:(\d{2,2}):(\d{2,2})\s.*$/;
    $sec2 = 60*$1 + $2;

    $diff = $sec2 - $sec1;
    $diff = $diff + 3600 unless ($diff >= 0);

    return $diff;
}
