1 rizwank 1.1 #!/usr/bin/perl
2 #-----------------------------------------------------------------------------
3 # Allows you to get one unique output log file, sorted on date,
4 # built from particular sources.
5 # This tool is part of AWStats log analyzer but can be use
6 # alone for any other log analyzer.
7 # See COPYING.TXT file about AWStats GNU General Public License.
8 #-----------------------------------------------------------------------------
9 # $Revision: 1.31 $ - $Author: eldy $ - $Date: 2004/11/27 14:32:00 $
10
11 use strict; no strict "refs";
12 #use diagnostics;
13
14 #-----------------------------------------------------------------------------
15 # Defines
16 #-----------------------------------------------------------------------------
17
18 # ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION
19 my $UseThread=0;
20 &Check_Thread_Use();
21 my $NbOfDNSLookupAsked = 0;
22 rizwank 1.1 my %threadarray = ();
23 my %MyDNSTable = ();
24 my %TmpDNSLookup = ();
25
26 # ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION
27 #my $UseThread=1;
28 #&Check_Thread_Use();
29 #my $NbOfDNSLookupAsked : shared = 0;
30 #my %threadarray : shared = ();
31 #my %MyDNSTable : shared = ();
32 #my %TmpDNSLookup : shared = ();
33
34
35 # ---------- Init variables --------
36 use vars qw/ $REVISION $VERSION /;
37 $REVISION='$Revision: 1.31 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1;
38 $VERSION="1.2 (build $REVISION)";
39
40 use vars qw/ $NBOFLINESFORBENCHMARK /;
41 $NBOFLINESFORBENCHMARK=8192;
42
43 rizwank 1.1 use vars qw/
44 $DIR $PROG $Extension
45 $Debug $ShowSteps $AddFileNum
46 $MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone
47 $NbOfLinesShowsteps $AFINET $QueueCursor
48 /;
49 $DIR='';
50 $PROG='';
51 $Extension='';
52 $Debug=0;
53 $ShowSteps=0;
54 $AddFileNum=0;
55 $MaxNbOfThread=0;
56 $DNSLookup=0;
57 $DNSCache='';
58 $DirCgi='';
59 $DirData='';
60 $DNSLookupAlreadyDone=0;
61 $NbOfLinesShowsteps=0;
62 $AFINET='';
63
64 rizwank 1.1 # ---------- Init arrays --------
65 use vars qw/
66 @SkipDNSLookupFor
67 @ParamFile
68 /;
69 # ---------- Init hash arrays --------
70 use vars qw/
71 %linerecord %timerecord %corrupted
72 %QueueHostsToResolve %QueueRecords
73 /;
74 %linerecord = %timerecord = %corrupted = ();
75 %QueueHostsToResolve = %QueueRecords = ();
76
77 # ---------- External Program variables ----------
78 # For gzip compression
79 my $zcat = 'zcat';
80 my $zcat_file = '\.gz$';
81 # For bz2 compression
82 my $bzcat = 'bzcat';
83 my $bzcat_file = '\.bz2$';
84
85 rizwank 1.1
86
87 #-----------------------------------------------------------------------------
88 # Functions
89 #-----------------------------------------------------------------------------
90
91 #------------------------------------------------------------------------------
92 # Function: Write an error message and exit
93 # Parameters: $message
94 # Input: None
95 # Output: None
96 # Return: None
97 #------------------------------------------------------------------------------
98 sub error {
99 print "Error: $_[0].\n";
100 exit 1;
101 }
102
103 #------------------------------------------------------------------------------
104 # Function: Write a debug message
105 # Parameters: $message
106 rizwank 1.1 # Input: $Debug
107 # Output: None
108 # Return: None
109 #------------------------------------------------------------------------------
110 sub debug {
111 my $level = $_[1] || 1;
112 if ($Debug >= $level) {
113 my $debugstring = $_[0];
114 print "DEBUG $level - ".localtime(time())." : $debugstring\n";
115 }
116 }
117
118 #------------------------------------------------------------------------------
119 # Function: Write a warning message
120 # Parameters: $message
121 # Input: $Debug
122 # Output: None
123 # Return: None
124 #------------------------------------------------------------------------------
125 sub warning {
126 my $messagestring=shift;
127 rizwank 1.1 if ($Debug) { debug("$messagestring",1); }
128 print "$messagestring\n";
129 }
130
131 #-----------------------------------------------------------------------------
132 # Function: Return 1 if string contains only ascii chars
133 # Input: String
134 # Return: 0 or 1
135 #-----------------------------------------------------------------------------
136 sub IsAscii {
137 my $string=shift;
138 if ($Debug) { debug("IsAscii($string)",5); }
139 if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) {
140 if ($Debug) { debug(" Yes",5); }
141 return 1; # Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t
142 }
143 if ($Debug) { debug(" No",5); }
144 return 0;
145 }
146
147 #-----------------------------------------------------------------------------
148 rizwank 1.1 # Function: Return 1 if string contains only ascii chars
149 # Input: String
150 # Return: 0 or 1
151 #-----------------------------------------------------------------------------
152 sub SkipDNSLookup {
153 foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
154 0; # Not in @SkipDNSLookupFor
155 }
156
157 #-----------------------------------------------------------------------------
158 # Function: Function that wait for DNS lookup (can be threaded)
159 # Input: String
160 # Return: 0 or 1
161 #-----------------------------------------------------------------------------
162 sub MakeDNSLookup {
163 my $ipaddress=shift;
164 $NbOfDNSLookupAsked++;
165 use Socket; $AFINET=AF_INET;
166 my $tid=0;
167 $tid=$MaxNbOfThread?eval("threads->self->tid()"):0;
168 if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); }
169 rizwank 1.1 my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET); # This is very slow, may took 20 seconds
170 if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) {
171 $TmpDNSLookup{$ipaddress}='*';
172 }
173 else {
174 $TmpDNSLookup{$ipaddress}=$lookupresult;
175 }
176 if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); }
177 delete $threadarray{$ipaddress};
178 return;
179 }
180
181 #-----------------------------------------------------------------------------
182 # Function: WriteRecordsReadyInQueue
183 # Input: -
184 # Return: 0
185 #-----------------------------------------------------------------------------
186 sub WriteRecordsReadyInQueue {
187 my $logfilechosen=shift;
188 if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); }
189 while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) {
190 rizwank 1.1 # $QueueCursor point to a ready record
191 if ($QueueHostsToResolve{$QueueCursor} eq '*') {
192 if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); }
193 }
194 else {
195 if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) {
196 if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') {
197 $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/;
198 if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
199 }
200 }
201 elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) {
202 if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') {
203 $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/;
204 if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
205 }
206 }
207 }
208 # Record is ready, we output it.
209 if ($AddFileNum) { print "$logfilechosen $QueueRecords{$QueueCursor}\n"; }
210 else { print "$QueueRecords{$QueueCursor}\n"; }
211 rizwank 1.1 delete $QueueRecords{$QueueCursor};
212 delete $QueueHostsToResolve{$QueueCursor};
213 $QueueCursor++;
214 }
215 return 0;
216 }
217
218 #-----------------------------------------------------------------------------
219 # Function: Check if thread are enabled or not
220 # Input: -
221 # Return: -
222 #-----------------------------------------------------------------------------
223 sub Check_Thread_Use {
224 if ($] >= 5.008) { for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) {
225 if ($UseThread) {
226 if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); }
227 if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); }
228 }
229 else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); }
230 } }
231 }
232 rizwank 1.1 }
233
234
235 #-----------------------------------------------------------------------------
236 # MAIN
237 #-----------------------------------------------------------------------------
238 ($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
239
240 # Get parameters (Note: $MaxNbOfThread is already known
241 my $cpt=1;
242 for (0..@ARGV-1) {
243 if ($ARGV[$_] =~ /^-/) {
244 if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; }
245 elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; }
246 elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; }
247 elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; }
248 elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; }
249 elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; }
250 else { print "Unknown argument $ARGV[$_] ignored\n"; }
251 }
252 else {
253 rizwank 1.1 push @ParamFile, $ARGV[$_];
254 $cpt++;
255 }
256 }
257 if ($Debug) { $|=1; }
258
259 if ($Debug) {
260 debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1);
261 debug("DNSLookup=$DNSLookup");
262 debug("DNSCache=$DNSCache");
263 debug("MaxNbOfThread=$MaxNbOfThread");
264 }
265
266 # Disallow MaxNbOfThread and Perl < 5.8
267 if ($] < 5.008 && $MaxNbOfThread) {
268 error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead");
269 }
270
271 # Warning, there is a memory hole in ActiveState perl version (in delete functions)
272 if ($^X =~ /activestate/i || $^X =~ /activeperl/i) {
273 # TODO Add a warning
274 rizwank 1.1
275 }
276
277 if (scalar @ParamFile == 0) {
278 print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
279 print "$PROG allows you to get one unique output log file, sorted on date,\n";
280 print "built from particular sources:\n";
281 print " - It can read several input log files,\n";
282 print " - It can read .gz/.bz2 log files,\n";
283 print " - It can also makes a fast reverse DNS lookup to replace\n";
284 print " all IP addresses into host names in resulting log file.\n";
285 print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n";
286 print "distributed with a GNU General Public License (See COPYING.txt file).\n";
287 print "$PROG is part of AWStats but can be used alone as a log merger\n";
288 print "or resolver before using any other log analyzer.\n";
289 print "\n";
290 print "Usage:\n";
291 print " $PROG.$Extension [options] file\n";
292 print " $PROG.$Extension [options] file1 ... filen\n";
293 print " $PROG.$Extension [options] *.*\n";
294 print " perl $PROG.$Extension [options] *.* > newfile\n";
295 rizwank 1.1 print "Options:\n";
296 print " -dnslookup make a reverse DNS lookup on IP adresses\n";
297 print " -dnslookup=n same with a n parallel threads instead of serial requests\n";
298 print " -dnscache=file make DNS lookup from cache file first before network lookup\n";
299 print " -showsteps print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n";
300 print " -addfilenum if used with several files, file number can be added in first\n";
301 print " field of output file. This can be used to add a cluster id\n";
302 print " when log files come from several load balanced computers.\n";
303 print "\n";
304
305 print "This runs $PROG in command line to open one or several\n";
306 print "server log files to merge them (sorted on date) and/or to make a reverse\n";
307 print "DNS lookup (if asked). The result log file is sent on standard output.\n";
308 print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n";
309 print "software able to output sorted log records (with a reverse DNS lookup\n";
310 print "included or not) even if log records are dispatched in several files.\n";
311 print "Each of thoose files must be already independently sorted itself\n";
312 print "(but that is the case in all web server log files). So you can use it\n";
313 print "for load balanced log files or to group several old log files.\n";
314 print "\n";
315 print "Don't forget that the main goal of logresolvemerge is to send log records to\n";
316 rizwank 1.1 print "a log analyzer in a sorted order without merging files on disk (NO NEED\n";
317 print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n";
318 print "OF MORE MEMORY). Choose of output records is done on the fly.\n";
319 print "\n";
320 print "So logresolvemerge is particularly usefull when you want to output several\n";
321 print "and/or large log files in a fast process, with no use of disk or\n";
322 print "more memory, and in a chronological order through a pipe (to be used by a log\n";
323 print "analyzer).\n";
324 print "\n";
325 print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n";
326 print "occurs with heavy servers), this is not a problem, the output will also\n";
327 print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n";
328 print "with such logs.\n";
329 print "\n";
330 print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n";
331 print "can't run this tool on Win or Unix platforms.\n";
332 print "\n";
333 print "WARNING: Because of important memory holes in ActiveState Perl version, use\n";
334 print "another Perl interpreter if you need to process large lof files.\n";
335 print "\n";
336 print "Now supports/detects:\n";
337 rizwank 1.1 print " Automatic detection of log format\n";
338 print " Files can be .gz/.bz2 files if zcat/bzcat tools are available in PATH.\n";
339 print " Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n";
340 print "New versions and FAQ at http://awstats.sourceforge.net\n";
341 exit 0;
342 }
343
344 # Get current time
345 my $nowtime=time;
346 my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime);
347 if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; }
348 my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//;
349 if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; }
350 if ($nowday < 10) { $nowday = "0$nowday"; }
351 if ($nowhour < 10) { $nowhour = "0$nowhour"; }
352 if ($nowmin < 10) { $nowmin = "0$nowmin"; }
353 if ($nowsec < 10) { $nowsec = "0$nowsec"; }
354 # Get tomorrow time (will be used to discard some record with corrupted date (future date))
355 my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400);
356 if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; }
357 my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//;
358 rizwank 1.1 if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; }
359 if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; }
360 if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; }
361 if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; }
362 if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; }
363 my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec;
364
365 # Init other parameters
366 $NBOFLINESFORBENCHMARK--;
367 if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; }
368 if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; }
369 if (! $DirData || $DirData eq '.') { $DirData=$DIR; } # If not defined or choosed to "." value then DirData is current dir
370 if (! $DirData) { $DirData='.'; } # If current dir not defined then we put it to "."
371 $DirData =~ s/\/$//;
372
373 #my %monthlib = ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" );
374 # monthnum must be in english because it's used to translate log date in apache log files which are always in english
375 my %monthnum = ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" );
376
377 if ($DNSCache) {
378 if ($Debug) { debug("Load DNS Cache file $DNSCache",2); }
379 rizwank 1.1 open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache");
380 while (<CACHE>) {
381 my ($time, $ip, $name) = split;
382 if ($ip && $name) {
383 $name="$ip" if $name eq '*';
384 $MyDNSTable{$ip}=$name;
385 }
386 }
387 close CACHE;
388 }
389
390 #-----------------------------------------------------------------------------
391 # PROCESSING CURRENT LOG(s)
392 #-----------------------------------------------------------------------------
393 my %LogFileToDo=();
394 my $NbOfLinesRead=0;
395 my $NbOfLinesParsed=0;
396 my $logfilechosen=0;
397 my $starttime=time();
398
399 # Define the LogFileToDo list
400 rizwank 1.1 $cpt=1;
401 foreach my $key (0..(@ParamFile-1)) {
402 if ($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) {
403 if ($Debug) { debug("Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); }
404
405 # Check for supported compression
406 if ($ParamFile[$key] =~ /$zcat_file/) {
407 if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); }
408 # Modify the name to include the zcat command
409 $ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |';
410 }
411 elsif ($ParamFile[$key] =~ /$bzcat_file/) {
412 if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); }
413 # Modify the name to include the bzcat command
414 $ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |';
415 }
416
417 $LogFileToDo{$cpt}=@ParamFile[$key];
418 $cpt++;
419 }
420 else {
421 rizwank 1.1 my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//;
422 $ParamFile[$key] = $1;
423 if ($DirFile eq '') { $DirFile = '.'; }
424 $ParamFile[$key] =~ s/\./\\\./g;
425 $ParamFile[$key] =~ s/\*/\.\*/g;
426 $ParamFile[$key] =~ s/\?/\./g;
427 if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); }
428 opendir(DIR,"$DirFile");
429 my @filearray = sort readdir DIR;
430 close DIR;
431 foreach my $i (0..$#filearray) {
432 if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") {
433 if ($Debug) { debug("Log file $filearray[$i] is added to LogFileToDo with number $cpt."); }
434 $LogFileToDo{$cpt}="$DirFile/$filearray[$i]";
435 $cpt++;
436 }
437 }
438 }
439 }
440
441 # If no files to process
442 rizwank 1.1 if (scalar keys %LogFileToDo == 0) {
443 error("No input log file found");
444 }
445
446 # Open all log files
447 if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); }
448 foreach my $logfilenb (keys %LogFileToDo) {
449 if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); }
450 open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!");
451 binmode "LOG$logfilenb"; # To avoid pb of corrupted text log files with binary chars.
452 }
453
454 $QueueCursor=1;
455 while (1 == 1)
456 {
457 # BEGIN Read new record (for each log file or only for log file with record just processed)
458 #------------------------------------------------------------------------------------------
459 foreach my $logfilenb (keys %LogFileToDo) {
460 if (($logfilechosen == 0) || ($logfilechosen == $logfilenb)) {
461 if ($Debug) { debug("Search next record in file number $logfilenb",3); }
462 # Read chosen log file until we found a record with good date or reaching end of file
463 rizwank 1.1 while (1 == 1) {
464 my $LOG="LOG$logfilenb";
465 $_=<$LOG>; # Read new line
466 if (! $_) { # No more records in log file number $logfilenb
467 if ($Debug) { debug(" No more records in file number $logfilenb",2); }
468 delete $LogFileToDo{$logfilenb};
469 last;
470 }
471
472 $NbOfLinesRead++;
473 chomp $_; s/\r$//;
474
475 if (/^#/) { next; } # Ignore comment lines (ISS writes such comments)
476 if (/^!!/) { next; } # Ignore comment lines (Webstar writes such comments)
477 if (/^$/) { next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
478
479 $linerecord{$logfilenb}=$_;
480
481 # Check filters
482 #----------------------------------------------------------------------
483
484 rizwank 1.1 # Split DD/Month/YYYY:HH:MM:SS or YYYY-MM-DD HH:MM:SS or MM/DD/YY\tHH:MM:SS
485 my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0;
486 if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; }
487 elsif ($_ =~ /\[(\d\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; }
488 elsif ($_ =~ /\[\w+ (\w+) (\d\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)\]/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; }
489
490 if ($monthnum{$month}) { $month=$monthnum{$month}; } # Change lib month in num month if necessary
491
492 # Create $timerecord like YYYYMMDDHHMMSS
493 $timerecord{$logfilenb}=int("$year$month$day$hour$minute$second");
494 if ($timerecord{$logfilenb}<10000000000000) {
495 if ($Debug) { debug(" This record is corrupted (no date found)",3); }
496 $corrupted{$logfilenb}++;
497 next;
498 }
499 if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); }
500 last;
501 }
502 }
503 }
504 # END Read new lines for each log file. After this, following var are filled
505 rizwank 1.1 # $timerecord{$logfilenb}
506
507 # We choose which record of which log file to process
508 if ($Debug) { debug("Choose which record of which log file to process",3); }
509 $logfilechosen=-1;
510 my $timeref="99999999999999";
511 foreach my $logfilenb (keys %LogFileToDo) {
512 if ($Debug) { debug(" timerecord for file $logfilenb is $timerecord{$logfilenb}",4); }
513 if ($timerecord{$logfilenb} < $timeref) { $logfilechosen=$logfilenb; $timeref=$timerecord{$logfilenb} }
514 }
515 if ($logfilechosen <= 0) { last; } # No more record to process
516 # Record is chosen
517 if ($Debug) { debug(" We choosed to qualify record of file number $logfilechosen",3); }
518 if ($Debug) { debug(" Record is $linerecord{$logfilechosen}",3); }
519
520 # Record is approved. We found a new line to parse in file number $logfilechosen
521 #-------------------------------------------------------------------------------
522 $NbOfLinesParsed++;
523 if ($ShowSteps) {
524 if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) {
525 my $delay=(time()-$starttime)||1;
526 rizwank 1.1 print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n";
527 }
528 }
529
530 # Do DNS lookup
531 #--------------------
532 my $Host='';
533 my $ip=0;
534 if ($DNSLookup) { # DNS lookup is 1 or 2
535 if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4
536 elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6
537 if ($ip) {
538 # Check in static DNS cache file
539 if ($MyDNSTable{$Host}) {
540 if ($Debug) { debug(" DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); }
541 }
542 elsif ($DNSLookup==1) {
543 # Check in session cache (dynamic DNS cache file + session DNS cache)
544 if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) {
545 if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) {
546 $TmpDNSLookup{$Host}='*';
547 rizwank 1.1 if ($Debug) { debug(" No need of reverse DNS lookup for $Host, skipped at user request.",4); }
548 }
549 else {
550 if ($ip == 4) {
551 # Create or not a new thread
552 if ($MaxNbOfThread) {
553 if (! $threadarray{$Host}) { # No thread already launched for $Host
554 while ((scalar keys %threadarray) >= $MaxNbOfThread) {
555 if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); }
556 sleep 1;
557 }
558 $threadarray{$Host}=1; # Semaphore to tell thread for $Host is active
559 # my $t = new Thread \&MakeDNSLookup, $Host;
560 my $t = threads->create(sub { MakeDNSLookup($Host) });
561 if (! $t) { error("Failed to create new thread"); }
562 if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); }
563 $t->detach(); # We don't need to keep return code
564 }
565 else {
566 if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); }
567 }
568 rizwank 1.1 # Here, this is the only way, $TmpDNSLookup{$Host} can be not defined
569 } else {
570 &MakeDNSLookup($Host);
571 if ($Debug) { debug(" Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); }
572 }
573 }
574 elsif ($ip == 6) {
575 $TmpDNSLookup{$Host}='*';
576 if ($Debug) { debug(" Reverse DNS lookup for $Host not available for IPv6",4); }
577 }
578 }
579 } else {
580 if ($Debug) { debug(" Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); }
581 }
582 }
583 else {
584 if ($Debug) { debug(" DNS lookup by static DNS cache file asked for $Host but not found.",4); }
585 }
586 }
587 else {
588 if ($Debug) { debug(" DNS lookup asked for $Host but this is not an IP address.",4); }
589 rizwank 1.1 $DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen};
590 }
591 }
592 else {
593 if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4
594 elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6
595 if ($Debug) { debug(" No DNS lookup asked.",4); }
596 }
597
598 # Put record in record queue
599 if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); }
600 $QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen};
601
602 # Put record in host queue
603 # If there is a host to resolve, we add line to queue with value of host to resolve
604 # $Host is '' (no ip found) or is ip
605 if ($DNSLookup==0) {
606 $QueueHostsToResolve{$NbOfLinesParsed}='*';
607 }
608 if ($DNSLookup==1) {
609 $QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*';
610 rizwank 1.1 }
611 if ($DNSLookup==2) {
612 $QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*';
613 }
614
615 # Print all records in head of queue that are ready
616 &WriteRecordsReadyInQueue($logfilechosen);
617
618 } # End of processing new record. Loop on next one.
619
620 if ($Debug) { debug("End of processing log file(s)"); }
621
622 # Close all log files
623 foreach my $logfilenb (keys %LogFileToDo) {
624 if ($Debug) { debug("Close log file number $logfilenb"); }
625 close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed");
626 }
627
628 while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) {
629 sleep 1;
630 # Print all records in head of queue that are ready
631 rizwank 1.1 &WriteRecordsReadyInQueue($logfilechosen);
632 }
633
634 # Waiting queue is empty
635 if ($MaxNbOfThread) {
636 foreach my $t (threads->list()) {
637 if ($Debug) { debug("Join thread $t"); }
638 $t->join();
639 }
640 }
641
642 # DNSLookup warning
643 if ($DNSLookup==1 && $DNSLookupAlreadyDone) {
644 warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed.");
645 }
646
647 if ($Debug) {
648 debug("Total nb of read lines: $NbOfLinesRead");
649 debug("Total nb of parsed lines: $NbOfLinesParsed");
650 debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked");
651 }
652 rizwank 1.1
653 #if ($DNSCache) {
654 # open(CACHE, ">$DNSCache") or die;
655 # foreach (keys %TmpDNSLookup) {
656 # $TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip";
657 # print CACHE "0\t$_\t$TmpDNSLookup{$_}\n";
658 # }
659 # close CACHE;
660 #}
661
662 0; # Do not remove this line
|