(file) Return to logresolvemerge.pl CVS log (file) (dir) Up to [RizwankCVS] / geekymedia_web / awstats-6.3 / tools

  1 rizwank 1.1 #!/usr/bin/perl
  2             #-----------------------------------------------------------------------------
  3             # Allows you to get one unique output log file, sorted on date,
  4             # built from particular sources.
  5             # This tool is part of AWStats log analyzer but can be use
  6             # alone for any other log analyzer.
  7             # See COPYING.TXT file about AWStats GNU General Public License.
  8             #-----------------------------------------------------------------------------
  9             # $Revision: 1.31 $ - $Author: eldy $ - $Date: 2004/11/27 14:32:00 $
 10             
 11             use strict; no strict "refs";
 12             #use diagnostics;
 13             
 14             #-----------------------------------------------------------------------------
 15             # Defines
 16             #-----------------------------------------------------------------------------
 17             
 18             # ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION
 19             my $UseThread=0;
 20             &Check_Thread_Use();
 21             my $NbOfDNSLookupAsked = 0;
 22 rizwank 1.1 my %threadarray = ();
 23             my %MyDNSTable = ();
 24             my %TmpDNSLookup = ();
 25             
 26             # ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION
 27             #my $UseThread=1;
 28             #&Check_Thread_Use();
 29             #my $NbOfDNSLookupAsked : shared = 0;
 30             #my %threadarray : shared = ();
 31             #my %MyDNSTable : shared = ();
 32             #my %TmpDNSLookup : shared = ();
 33             
 34             
 35             # ---------- Init variables --------
 36             use vars qw/ $REVISION $VERSION /;
 37             $REVISION='$Revision: 1.31 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1;
 38             $VERSION="1.2 (build $REVISION)";
 39             
 40             use vars qw/ $NBOFLINESFORBENCHMARK /;
 41             $NBOFLINESFORBENCHMARK=8192;
 42             
 43 rizwank 1.1 use vars qw/
 44             $DIR $PROG $Extension
 45             $Debug $ShowSteps $AddFileNum
 46             $MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone
 47             $NbOfLinesShowsteps $AFINET $QueueCursor
 48             /;
 49             $DIR='';
 50             $PROG='';
 51             $Extension='';
 52             $Debug=0;
 53             $ShowSteps=0;
 54             $AddFileNum=0;
 55             $MaxNbOfThread=0;
 56             $DNSLookup=0;
 57             $DNSCache='';
 58             $DirCgi='';
 59             $DirData='';
 60             $DNSLookupAlreadyDone=0;
 61             $NbOfLinesShowsteps=0;
 62             $AFINET='';
 63             
 64 rizwank 1.1 # ---------- Init arrays --------
 65             use vars qw/
 66             @SkipDNSLookupFor
 67             @ParamFile
 68             /;
 69             # ---------- Init hash arrays --------
 70             use vars qw/
 71             %linerecord %timerecord %corrupted
 72             %QueueHostsToResolve %QueueRecords
 73             /;
 74             %linerecord = %timerecord = %corrupted = ();
 75             %QueueHostsToResolve = %QueueRecords = ();
 76             
 77             # ---------- External Program variables ----------
 78             # For gzip compression
 79             my $zcat = 'zcat';
 80             my $zcat_file = '\.gz$';
 81             # For bz2 compression
 82             my $bzcat = 'bzcat';
 83             my $bzcat_file = '\.bz2$';
 84             
 85 rizwank 1.1 
 86             
 87             #-----------------------------------------------------------------------------
 88             # Functions
 89             #-----------------------------------------------------------------------------
 90             
 91             #------------------------------------------------------------------------------
 92             # Function:		Write an error message and exit
 93             # Parameters:	$message
 94             # Input:		None
 95             # Output:		None
 96             # Return:		None
 97             #------------------------------------------------------------------------------
 98             sub error {
 99             	print "Error: $_[0].\n";
100                 exit 1;
101             }
102             
103             #------------------------------------------------------------------------------
104             # Function:		Write a debug message
105             # Parameters:	$message
106 rizwank 1.1 # Input:		$Debug
107             # Output:		None
108             # Return:		None
109             #------------------------------------------------------------------------------
110             sub debug {
111             	my $level = $_[1] || 1;
112             	if ($Debug >= $level) { 
113             		my $debugstring = $_[0];
114             		print "DEBUG $level - ".localtime(time())." : $debugstring\n";
115             	}
116             }
117             
118             #------------------------------------------------------------------------------
119             # Function:		Write a warning message
120             # Parameters:	$message
121             # Input:		$Debug
122             # Output:		None
123             # Return:		None
124             #------------------------------------------------------------------------------
125             sub warning {
126             	my $messagestring=shift;
127 rizwank 1.1 	if ($Debug) { debug("$messagestring",1); }
128                	print "$messagestring\n";
129             }
130             
131             #-----------------------------------------------------------------------------
132             # Function:     Return 1 if string contains only ascii chars
133             # Input:        String
134             # Return:       0 or 1
135             #-----------------------------------------------------------------------------
136             sub IsAscii {
137             	my $string=shift;
138             	if ($Debug) { debug("IsAscii($string)",5); }
139             	if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) {
140             		if ($Debug) { debug(" Yes",5); }
141             		return 1;		# Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t
142             	}
143             	if ($Debug) { debug(" No",5); }
144             	return 0;
145             }
146             
147             #-----------------------------------------------------------------------------
148 rizwank 1.1 # Function:     Return 1 if string contains only ascii chars
149             # Input:        String
150             # Return:       0 or 1
151             #-----------------------------------------------------------------------------
152             sub SkipDNSLookup {
153             	foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
154             	0; # Not in @SkipDNSLookupFor
155             }
156             
157             #-----------------------------------------------------------------------------
158             # Function:     Function that wait for DNS lookup (can be threaded)
159             # Input:        String
160             # Return:       0 or 1
161             #-----------------------------------------------------------------------------
162             sub MakeDNSLookup {
163             	my $ipaddress=shift;
164              	$NbOfDNSLookupAsked++;
165             	use Socket; $AFINET=AF_INET;
166             	my $tid=0;
167             	$tid=$MaxNbOfThread?eval("threads->self->tid()"):0;
168             	if ($Debug) { debug("  ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); }
169 rizwank 1.1 	my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET);	# This is very slow, may took 20 seconds
170             	if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) {
171             		$TmpDNSLookup{$ipaddress}='*';
172             	}
173             	else {
174             		$TmpDNSLookup{$ipaddress}=$lookupresult;
175             	}
176             	if ($Debug) { debug("  ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); }
177             	delete $threadarray{$ipaddress};
178             	return;
179             }
180             
181             #-----------------------------------------------------------------------------
182             # Function:     WriteRecordsReadyInQueue
183             # Input:        -
184             # Return:       0
185             #-----------------------------------------------------------------------------
186             sub WriteRecordsReadyInQueue {
187             	my $logfilechosen=shift;
188             	if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); }
189             	while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) {
190 rizwank 1.1 		# $QueueCursor point to a ready record
191             		if ($QueueHostsToResolve{$QueueCursor} eq '*') {
192             			if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); }
193             		}
194             		else {
195             			if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) {
196             				if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') {
197             					$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/;
198             					if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
199             				}
200             			}
201             			elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) {
202             				if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') {
203             					$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/;
204             					if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
205             				}
206             			}
207             		}
208             		# Record is ready, we output it.
209             		if ($AddFileNum) { print "$logfilechosen $QueueRecords{$QueueCursor}\n"; }
210             		else { print "$QueueRecords{$QueueCursor}\n"; }
211 rizwank 1.1 		delete $QueueRecords{$QueueCursor};
212             		delete $QueueHostsToResolve{$QueueCursor};
213             		$QueueCursor++;
214             	}
215             	return 0;
216             }
217             
218             #-----------------------------------------------------------------------------
219             # Function:     Check if thread are enabled or not
220             # Input:        -
221             # Return:       -
222             #-----------------------------------------------------------------------------
223             sub Check_Thread_Use {
224             	if ($] >= 5.008) {	for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) {
225             		if ($UseThread) {
226             			if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); }
227             			if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); }
228             		}
229             		else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); }
230             		} }
231             	}
232 rizwank 1.1 }
233             
234             
235             #-----------------------------------------------------------------------------
236             # MAIN
237             #-----------------------------------------------------------------------------
238             ($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
239             
240             # Get parameters (Note: $MaxNbOfThread is already known
241             my $cpt=1;
242             for (0..@ARGV-1) {
243             	if ($ARGV[$_] =~ /^-/) {
244             		if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; }
245             		elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; }
246             		elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; }
247             		elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; }
248             		elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; }
249             		elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; }
250             		else { print "Unknown argument $ARGV[$_] ignored\n"; }
251             	}
252             	else {
253 rizwank 1.1 		push @ParamFile, $ARGV[$_];
254             		$cpt++;
255             	}
256             }
257             if ($Debug) { $|=1; }
258             
259             if ($Debug) {
260             	debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1);
261             	debug("DNSLookup=$DNSLookup");
262             	debug("DNSCache=$DNSCache");
263             	debug("MaxNbOfThread=$MaxNbOfThread");
264             }
265             
266             # Disallow MaxNbOfThread and Perl < 5.8
267             if ($] < 5.008 && $MaxNbOfThread) {
268             	error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead");
269             }
270             
271             # Warning, there is a memory hole in ActiveState perl version (in delete functions)
272             if ($^X =~ /activestate/i || $^X =~ /activeperl/i) {
273             	# TODO Add a warning
274 rizwank 1.1 
275             }
276             
277             if (scalar @ParamFile == 0) {
278             	print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
279             	print "$PROG allows you to get one unique output log file, sorted on date,\n";
280             	print "built from particular sources:\n";
281             	print " - It can read several input log files,\n";
282             	print " - It can read .gz/.bz2 log files,\n";
283             	print " - It can also makes a fast reverse DNS lookup to replace\n";
284             	print "   all IP addresses into host names in resulting log file.\n";
285             	print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n";
286             	print "distributed with a GNU General Public License (See COPYING.txt file).\n";
287             	print "$PROG is part of AWStats but can be used alone as a log merger\n";
288             	print "or resolver before using any other log analyzer.\n";
289             	print "\n";
290             	print "Usage:\n";
291             	print "  $PROG.$Extension [options] file\n";
292             	print "  $PROG.$Extension [options] file1 ... filen\n";
293             	print "  $PROG.$Extension [options] *.*\n";
294             	print "  perl $PROG.$Extension [options] *.* > newfile\n";
295 rizwank 1.1 	print "Options:\n";
296             	print "  -dnslookup     make a reverse DNS lookup on IP adresses\n";
297             	print "  -dnslookup=n   same with a n parallel threads instead of serial requests\n";
298             	print "  -dnscache=file make DNS lookup from cache file first before network lookup\n";
299             	print "  -showsteps     print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n";
300             	print "  -addfilenum    if used with several files, file number can be added in first\n";
301             	print "                 field of output file. This can be used to add a cluster id\n";
302             	print "                 when log files come from several load balanced computers.\n";
303             	print "\n";
304             	
305             	print "This runs $PROG in command line to open one or several\n";
306             	print "server log files to merge them (sorted on date) and/or to make a reverse\n";
307             	print "DNS lookup (if asked). The result log file is sent on standard output.\n";
308             	print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n";
309             	print "software able to output sorted log records (with a reverse DNS lookup\n";
310             	print "included or not) even if log records are dispatched in several files.\n";
311             	print "Each of thoose files must be already independently sorted itself\n";
312             	print "(but that is the case in all web server log files). So you can use it\n";
313             	print "for load balanced log files or to group several old log files.\n";
314             	print "\n";
315             	print "Don't forget that the main goal of logresolvemerge is to send log records to\n";
316 rizwank 1.1 	print "a log analyzer in a sorted order without merging files on disk (NO NEED\n";
317             	print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n";
318             	print "OF MORE MEMORY). Choose of output records is done on the fly.\n";
319             	print "\n";
320             	print "So logresolvemerge is particularly usefull when you want to output several\n";
321             	print "and/or large log files in a fast process, with no use of disk or\n";
322             	print "more memory, and in a chronological order through a pipe (to be used by a log\n";
323             	print "analyzer).\n";
324             	print "\n";
325             	print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n";
326             	print "occurs with heavy servers), this is not a problem, the output will also\n";
327             	print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n";
328             	print "with such logs.\n";
329             	print "\n";
330             	print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n";
331             	print "can't run this tool on Win or Unix platforms.\n";
332             	print "\n";
333             	print "WARNING: Because of important memory holes in ActiveState Perl version, use\n";
334             	print "another Perl interpreter if you need to process large lof files.\n";
335             	print "\n";
336             	print "Now supports/detects:\n";
337 rizwank 1.1 	print "  Automatic detection of log format\n";
338             	print "  Files can be .gz/.bz2 files if zcat/bzcat tools are available in PATH.\n";
339             	print "  Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n";
340             	print "New versions and FAQ at http://awstats.sourceforge.net\n";
341             	exit 0;
342             }
343             
344             # Get current time
345             my $nowtime=time;
346             my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime);
347             if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; }
348             my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//;
349             if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; }
350             if ($nowday < 10) { $nowday = "0$nowday"; }
351             if ($nowhour < 10) { $nowhour = "0$nowhour"; }
352             if ($nowmin < 10) { $nowmin = "0$nowmin"; }
353             if ($nowsec < 10) { $nowsec = "0$nowsec"; }
354             # Get tomorrow time (will be used to discard some record with corrupted date (future date))
355             my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400);
356             if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; }
357             my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//;
358 rizwank 1.1 if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; }
359             if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; }
360             if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; }
361             if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; }
362             if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; }
363             my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec;	
364             
365             # Init other parameters
366             $NBOFLINESFORBENCHMARK--;
367             if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; }
368             if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; }
369             if (! $DirData || $DirData eq '.') { $DirData=$DIR; }	# If not defined or choosed to "." value then DirData is current dir
370             if (! $DirData)  { $DirData='.'; }						# If current dir not defined then we put it to "."
371             $DirData =~ s/\/$//;
372             
373             #my %monthlib =  ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" );
374             # monthnum must be in english because it's used to translate log date in apache log files which are always in english
375             my %monthnum =  ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" );
376             
377             if ($DNSCache) {
378             	if ($Debug) { debug("Load DNS Cache file $DNSCache",2); }
379 rizwank 1.1 	open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache");
380             	while (<CACHE>) {
381             		my ($time, $ip, $name) = split;
382                     if ($ip && $name) {
383                         $name="$ip" if $name eq '*';
384                 		$MyDNSTable{$ip}=$name;
385                     }
386             	}
387             	close CACHE;
388             }
389             
390             #-----------------------------------------------------------------------------
391             # PROCESSING CURRENT LOG(s)
392             #-----------------------------------------------------------------------------
393             my %LogFileToDo=();
394             my $NbOfLinesRead=0;
395             my $NbOfLinesParsed=0;
396             my $logfilechosen=0;
397             my $starttime=time();
398             
399             # Define the LogFileToDo list
400 rizwank 1.1 $cpt=1;
401             foreach my $key (0..(@ParamFile-1)) {
402             	if ($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) {
403             		if ($Debug) { debug("Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); }
404             
405             		# Check for supported compression 
406             		if ($ParamFile[$key] =~ /$zcat_file/) {
407             			if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); }
408             			# Modify the name to include the zcat command
409             			$ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |';
410             		}
411             		elsif ($ParamFile[$key] =~ /$bzcat_file/) {
412             			if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); }
413             			# Modify the name to include the bzcat command
414             			$ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |';
415             		}
416             
417             		$LogFileToDo{$cpt}=@ParamFile[$key];
418             		$cpt++;
419             	}
420             	else {
421 rizwank 1.1 		my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//;
422             		$ParamFile[$key] = $1;
423             		if ($DirFile eq '') { $DirFile = '.'; }
424             		$ParamFile[$key] =~ s/\./\\\./g;
425             		$ParamFile[$key] =~ s/\*/\.\*/g;
426             		$ParamFile[$key] =~ s/\?/\./g;
427             		if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); }
428             		opendir(DIR,"$DirFile");
429             		my @filearray = sort readdir DIR;
430             		close DIR;
431             		foreach my $i (0..$#filearray) {
432             			if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") {
433             				if ($Debug) { debug("Log file $filearray[$i] is added to LogFileToDo with number $cpt."); }
434             				$LogFileToDo{$cpt}="$DirFile/$filearray[$i]";
435             				$cpt++;
436             			}
437             		}
438             	}
439             }
440             
441             # If no files to process
442 rizwank 1.1 if (scalar keys %LogFileToDo == 0) {
443             	error("No input log file found");
444             }
445             
446             # Open all log files
447             if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); }
448             foreach my $logfilenb (keys %LogFileToDo) {
449             	if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); }
450             	open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!");
451             	binmode "LOG$logfilenb";	# To avoid pb of corrupted text log files with binary chars.
452             }
453             
454             $QueueCursor=1;
455             while (1 == 1)
456             {
457             	# BEGIN Read new record (for each log file or only for log file with record just processed)
458             	#------------------------------------------------------------------------------------------
459             	foreach my $logfilenb (keys %LogFileToDo) {
460             		if (($logfilechosen == 0) || ($logfilechosen == $logfilenb)) {
461             			if ($Debug) { debug("Search next record in file number $logfilenb",3); }
462             			# Read chosen log file until we found a record with good date or reaching end of file
463 rizwank 1.1 			while (1 == 1) {
464             				my $LOG="LOG$logfilenb";
465             				$_=<$LOG>;	# Read new line
466             				if (! $_) {							# No more records in log file number $logfilenb
467             					if ($Debug) { debug(" No more records in file number $logfilenb",2); }
468             					delete $LogFileToDo{$logfilenb};
469             					last;
470             				}
471             
472             				$NbOfLinesRead++;
473             				chomp $_; s/\r$//;
474             
475             				if (/^#/) { next; }									# Ignore comment lines (ISS writes such comments)
476             				if (/^!!/) { next; }								# Ignore comment lines (Webstar writes such comments)
477             				if (/^$/) { next; }									# Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
478             
479             				$linerecord{$logfilenb}=$_; 
480             
481             				# Check filters
482             				#----------------------------------------------------------------------
483             
484 rizwank 1.1 				# Split DD/Month/YYYY:HH:MM:SS or YYYY-MM-DD HH:MM:SS or MM/DD/YY\tHH:MM:SS
485             				my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0;
486             				if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; }
487             				elsif ($_ =~ /\[(\d\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; }
488             				elsif ($_ =~ /\[\w+ (\w+) (\d\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)\]/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; }
489             
490             				if ($monthnum{$month}) { $month=$monthnum{$month}; }	# Change lib month in num month if necessary
491             
492             				# Create $timerecord like YYYYMMDDHHMMSS
493             		 		$timerecord{$logfilenb}=int("$year$month$day$hour$minute$second");
494             				if ($timerecord{$logfilenb}<10000000000000) {
495             					if ($Debug) { debug(" This record is corrupted (no date found)",3); }
496             					$corrupted{$logfilenb}++;
497             					next;
498             				}
499             				if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); }
500             				last;
501             			}
502             		}
503             	}
504             	# END Read new lines for each log file. After this, following var are filled
505 rizwank 1.1 	# $timerecord{$logfilenb}
506             
507             	# We choose which record of which log file to process
508             	if ($Debug) { debug("Choose which record of which log file to process",3); }
509             	$logfilechosen=-1;
510             	my $timeref="99999999999999";
511             	foreach my $logfilenb (keys %LogFileToDo) {
512             		if ($Debug) { debug(" timerecord for file $logfilenb is $timerecord{$logfilenb}",4); }
513             		if ($timerecord{$logfilenb} < $timeref) { $logfilechosen=$logfilenb; $timeref=$timerecord{$logfilenb} }
514             	}
515             	if ($logfilechosen <= 0) { last; }								# No more record to process
516             	# Record is chosen
517             	if ($Debug) { debug(" We choosed to qualify record of file number $logfilechosen",3); }
518             	if ($Debug) { debug("  Record is $linerecord{$logfilechosen}",3); }
519             			
520             	# Record is approved. We found a new line to parse in file number $logfilechosen
521             	#-------------------------------------------------------------------------------
522             	$NbOfLinesParsed++;
523             	if ($ShowSteps) {
524             		if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) {
525             			my $delay=(time()-$starttime)||1;
526 rizwank 1.1 			print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n";
527             		}
528             	}
529             
530             	# Do DNS lookup
531             	#--------------------
532             	my $Host='';
533             	my $ip=0;
534             	if ($DNSLookup) {			# DNS lookup is 1 or 2
535             		if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; }	# IPv4
536             		elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; }						# IPv6
537             		if ($ip) {
538             			# Check in static DNS cache file
539             			if ($MyDNSTable{$Host}) {
540             				if ($Debug) { debug("  DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); }
541             			}
542             			elsif ($DNSLookup==1) {
543             				# Check in session cache (dynamic DNS cache file + session DNS cache)
544             				if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) {
545             					if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) {
546             						$TmpDNSLookup{$Host}='*';
547 rizwank 1.1 						if ($Debug) { debug("  No need of reverse DNS lookup for $Host, skipped at user request.",4); }
548             					}
549             					else {
550             						if ($ip == 4) {
551             							# Create or not a new thread
552             							if ($MaxNbOfThread) {
553             								if (! $threadarray{$Host}) {	# No thread already launched for $Host
554             									while ((scalar keys %threadarray) >= $MaxNbOfThread) {
555             										if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); }
556             										sleep 1;
557             									}
558             									$threadarray{$Host}=1;		# Semaphore to tell thread for $Host is active
559             #									my $t = new Thread \&MakeDNSLookup, $Host;
560             									my $t = threads->create(sub { MakeDNSLookup($Host) });
561             									if (! $t) { error("Failed to create new thread"); }
562             									if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); }
563             									$t->detach();	# We don't need to keep return code
564             								}
565             								else {
566             									if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); }
567             								}
568 rizwank 1.1 								# Here, this is the only way, $TmpDNSLookup{$Host} can be not defined
569             							} else {
570             								&MakeDNSLookup($Host);
571             								if ($Debug) { debug("  Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); }
572             							}				
573             						}
574             						elsif ($ip == 6) {
575             							$TmpDNSLookup{$Host}='*';
576             							if ($Debug) { debug("  Reverse DNS lookup for $Host not available for IPv6",4); }
577             						}
578             					}
579             				} else {
580             					if ($Debug) { debug("  Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); }
581             				}
582             			}
583             			else {
584             				if ($Debug) { debug("  DNS lookup by static DNS cache file asked for $Host but not found.",4); }
585             			}
586             		}
587             		else {
588             			if ($Debug) { debug("  DNS lookup asked for $Host but this is not an IP address.",4); }
589 rizwank 1.1 			$DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen};
590             		}
591             	}
592             	else {
593             		if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; }	# IPv4
594             		elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; }						# IPv6
595             		if ($Debug) { debug("  No DNS lookup asked.",4); }
596             	}
597             
598             	# Put record in record queue
599             	if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); }
600             	$QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen};
601             
602             	# Put record in host queue
603             	# If there is a host to resolve, we add line to queue with value of host to resolve
604             	# $Host is '' (no ip found) or is ip
605             	if ($DNSLookup==0) {
606             		$QueueHostsToResolve{$NbOfLinesParsed}='*';
607             	}
608             	if ($DNSLookup==1) { 
609             		$QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*';
610 rizwank 1.1 	}
611             	if ($DNSLookup==2) {
612             		$QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*';
613             	}
614             
615             	# Print all records in head of queue that are ready
616             	&WriteRecordsReadyInQueue($logfilechosen);
617             	
618             }	# End of processing new record. Loop on next one.
619             
620             if ($Debug) { debug("End of processing log file(s)"); }
621             
622             # Close all log files
623             foreach my $logfilenb (keys %LogFileToDo) {
624             	if ($Debug) { debug("Close log file number $logfilenb"); }
625             	close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed");
626             }
627             
628             while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) {
629             	sleep 1;
630             	# Print all records in head of queue that are ready
631 rizwank 1.1 	&WriteRecordsReadyInQueue($logfilechosen);
632             }
633             
634             # Waiting queue is empty
635             if ($MaxNbOfThread) {
636             	foreach my $t (threads->list()) {
637             		if ($Debug) { debug("Join thread $t"); }
638             		$t->join();
639             	}
640             }
641             
642             # DNSLookup warning
643             if ($DNSLookup==1 && $DNSLookupAlreadyDone) {
644             	warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed.");
645             }
646             
647             if ($Debug) {
648             	debug("Total nb of read lines: $NbOfLinesRead");
649             	debug("Total nb of parsed lines: $NbOfLinesParsed");
650             	debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked");
651             }
652 rizwank 1.1 
653             #if ($DNSCache) {
654             #	open(CACHE, ">$DNSCache") or die;
655             #	foreach (keys %TmpDNSLookup) {
656             #		$TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip";
657             #		print CACHE "0\t$_\t$TmpDNSLookup{$_}\n";
658             #	}
659             #	close CACHE;
660             #}
661             
662             0;	# Do not remove this line

Rizwan Kassim
Powered by
ViewCVS 0.9.2