1 rizwank 1.1 #!/usr/bin/perl
2 #-----------------------------------------------------------------------------
3 # Export lib data values to a text files to allow to use AWStats robots,
4 # os, browsers, search_engines database with other log analyzers
5 #-----------------------------------------------------------------------------
6 # $Revision: 1.4 $ - $Author: eldy $ - $Date: 2003/12/05 23:53:37 $
7
8 #use warnings; # Must be used in test mode only. This reduce a little process speed
9 #use diagnostics; # Must be used in test mode only. This reduce a lot of process speed
10 use strict;no strict "refs";
11
12
13
14 #-----------------------------------------------------------------------------
15 # Defines
16 #-----------------------------------------------------------------------------
17 use vars qw/ $REVISION $VERSION /;
18 my $REVISION='$Revision: 1.4 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1;
19 my $VERSION="5.1 (build $REVISION)";
20
21 # ---------- Init variables -------
22 rizwank 1.1 # Constants
23 use vars qw/
24 $DEBUGFORCED
25 /;
26 $DEBUGFORCED=0; # Force debug level to log lesser level into debug.log file (Keep this value to 0)
27 # Running variables
28 use vars qw/
29 $DIR $PROG $Extension
30 $Debug
31 $DebugResetDone
32 /;
33 $DIR=$PROG=$Extension='';
34 $Debug=0;
35 $DebugResetDone=0;
36 use vars qw/
37 $LevelForRobotsDetection $LevelForBrowsersDetection $LevelForOSDetection $LevelForRefererAnalyze
38 $LevelForSearchEnginesDetection $LevelForKeywordsDetection
39 /;
40 ($LevelForRobotsDetection, $LevelForBrowsersDetection, $LevelForOSDetection, $LevelForRefererAnalyze,
41 $LevelForSearchEnginesDetection, $LevelForKeywordsDetection)=
42 (2,1,1,1,1,1);
43 rizwank 1.1 use vars qw/
44 $DirLock $DirCgi $DirData $DirIcons $DirLang $AWScript $ArchiveFileName
45 $AllowAccessFromWebToFollowingIPAddresses $HTMLHeadSection $HTMLEndSection $LinksToWhoIs $LinksToIPWhoIs
46 $LogFile $LogFormat $LogSeparator $Logo $LogoLink $StyleSheet $WrapperScript $SiteDomain
47 /;
48 ($DirLock, $DirCgi, $DirData, $DirIcons, $DirLang, $AWScript, $ArchiveFileName,
49 $AllowAccessFromWebToFollowingIPAddresses, $HTMLHeadSection, $HTMLEndSection, $LinksToWhoIs, $LinksToIPWhoIs,
50 $LogFile, $LogFormat, $LogSeparator, $Logo, $LogoLink, $StyleSheet, $WrapperScript, $SiteDomain)=
51 ("","","","","","","","","","","","","","","","","","","","");
52 use vars qw/
53 $QueryString $LibToExport $ExportFormat
54 /;
55 ($QueryString, $LibToExport, $ExportFormat)=
56 ('','','');
57 # ---------- Init arrays --------
58 use vars qw/
59 @RobotsSearchIDOrder_list1 @RobotsSearchIDOrder_list2 @RobotsSearchIDOrder_listgen
60 @SearchEnginesSearchIDOrder_list1 @SearchEnginesSearchIDOrder_list2 @SearchEnginesSearchIDOrder_listgen
61 @BrowsersSearchIDOrder @OSSearchIDOrder @WordsToExtractSearchUrl @WordsToCleanSearchUrl
62 @WormsSearchIDOrder
63 @RobotsSearchIDOrder @SearchEnginesSearchIDOrder
64 rizwank 1.1 /;
65 @RobotsSearchIDOrder = @SearchEnginesSearchIDOrder = ();
66 # ---------- Init hash arrays --------
67 use vars qw/
68 %BrowsersHashIDLib %BrowsersHashIcon %BrowsersHereAreGrabbers
69 %DomainsHashIDLib
70 %MimeHashLib %MimeHashIcon %MimeHashFamily
71 %OSHashID %OSHashLib
72 %RobotsHashIDLib
73 %SearchEnginesHashID %SearchEnginesHashLib %SearchEnginesKnownUrl %NotSearchEnginesKeys
74 %WormsHashID %WormsHashLib
75 /;
76
77
78
79 #-----------------------------------------------------------------------------
80 # Functions
81 #-----------------------------------------------------------------------------
82
83 #------------------------------------------------------------------------------
84 # Function: Write error message and exit
85 rizwank 1.1 # Parameters: $message $secondmessage $thirdmessage $donotshowsetupinfo
86 # Input: $LogSeparator $LogFormat
87 # Output: None
88 # Return: None
89 #------------------------------------------------------------------------------
90 sub error {
91 my $message=shift||"";
92 my $secondmessage=shift||"";
93 my $thirdmessage=shift||"";
94 my $donotshowsetupinfo=shift||0;
95 if ($Debug) { debug("$message $secondmessage $thirdmessage",1); }
96 print "$message";
97 print "\n";
98 exit 1;
99 }
100
101 #------------------------------------------------------------------------------
102 # Function: Write debug message and exit
103 # Parameters: $string $level
104 # Input: $Debug = required level $DEBUGFORCED = required level forced
105 # Output: None
106 rizwank 1.1 # Return: None
107 #------------------------------------------------------------------------------
108 sub debug {
109 my $level = $_[1] || 1;
110 if ($level <= $DEBUGFORCED) {
111 my $debugstring = $_[0];
112 if (! $DebugResetDone) { open(DEBUGFORCEDFILE,"debug.log"); close DEBUGFORCEDFILE; chmod 0666,"debug.log"; $DebugResetDone=1; }
113 open(DEBUGFORCEDFILE,">>debug.log");
114 print DEBUGFORCEDFILE localtime(time)." - $$ - DEBUG $level - $debugstring\n";
115 close DEBUGFORCEDFILE;
116 }
117 if ($level <= $Debug) {
118 my $debugstring = $_[0];
119 print localtime(time)." - DEBUG $level - $debugstring\n";
120 }
121 }
122
123
124 #------------------------------------------------------------------------------
125 # Function: Load the reference databases
126 # Parameters: None
127 rizwank 1.1 # Input: $DIR
128 # Output: Arrays and Hash tables are defined
129 # Return: None
130 #------------------------------------------------------------------------------
131 sub Read_Ref_Data {
132 # Check lib files in common possible directories :
133 # Windows : "${DIR}lib" (lib in same dir than awstats.pl)
134 # Debian package : "/usr/share/awstats/lib"
135 # Other possible directories : "./lib"
136 my $lib=shift;
137 my $dir=$lib;
138 $lib=~ s/^.*[\\\/]//;
139 $dir =~ s/[^\\\/]+$//; $dir =~ s/[\\\/]+$//;
140 debug("Lib: $lib, Dir: $dir");
141 my @PossibleLibDir=("$dir","{DIR}lib","/usr/share/awstats/lib","./lib");
142
143 my %FilePath=();
144 my @FileListToLoad=();
145 push @FileListToLoad, "$lib";
146 foreach my $file (@FileListToLoad) {
147 foreach my $dir (@PossibleLibDir) {
148 rizwank 1.1 my $searchdir=$dir;
149 if ($searchdir && (!($searchdir =~ /\/$/)) && (!($searchdir =~ /\\$/)) ) { $searchdir .= "/"; }
150 if (! $FilePath{$file}) {
151 if (-s "${searchdir}${file}") {
152 $FilePath{$file}="${searchdir}${file}";
153 if ($Debug) { debug("Call to Read_Ref_Data [FilePath{$file}=\"$FilePath{$file}\"]"); }
154 # push @INC, "${searchdir}"; require "${file}";
155 require "$FilePath{$file}";
156 }
157 }
158 }
159 if (! $FilePath{$file}) {
160 my $filetext=$file; $filetext =~ s/\.pm$//; $filetext =~ s/_/ /g;
161 &error("Error: Can't read file \"$file\".\nCheck if file is in ".($PossibleLibDir[0])." directory and is readable.");
162 }
163 }
164 }
165
166 #------------------------------------------------------------------------------
167 # Function: Unregex a string
168 # Parameters: String
169 rizwank 1.1 # Input: -
170 # Output: -
171 # Return: Unregexed string
172 #------------------------------------------------------------------------------
173 sub unregex {
174 my $ss=shift;
175 $ss=~s/\\//g;
176 return $ss;
177 }
178
179 #------------------------------------------------------------------------------
180 # Function: Unregex a keyword code extractor
181 # Parameters: String
182 # Input: -
183 # Output: -
184 # Return: Unregexed string
185 #------------------------------------------------------------------------------
186 sub unregexkeywordcode {
187 my $ss=shift;
188 my $firstoneonly=shift||0;
189 my @xx=split(/\|/,$ss);
190 rizwank 1.1 my @ll=map { s/[\(\)]//g; $_; } @xx;
191 if ($firstoneonly) { return $ll[0]; }
192 return join(',',@ll);
193 }
194
195
196
197 #------------------------------------------------------------------------------
198 # MAIN
199 #------------------------------------------------------------------------------
200 ($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
201
202 my @AllowedArgs=('-lib','-exportformat','-debug');
203
204 $QueryString="";
205 for (0..@ARGV-1) {
206 # TODO Check if ARGV is an AllowedArg
207 if ($_ > 0) { $QueryString .= "&"; }
208 my $NewLinkParams=$ARGV[$_]; $NewLinkParams =~ s/^-+//; $NewLinkParams =~ s/\s/%20/g;
209 $QueryString .= "$NewLinkParams";
210 }
211 rizwank 1.1 $ExportFormat="text";
212 if ($QueryString =~ /lib=([^\s&]+)/i) { $LibToExport="$1"; }
213 if ($QueryString =~ /exportformat=([^\s&]+)/i) { $ExportFormat="$1"; }
214 if ($QueryString =~ /debug=(\d+)/i) { $Debug=$1; }
215
216 if ($Debug) {
217 debug("$PROG - $VERSION - Perl $^X $]",1);
218 debug("QUERY_STRING=$QueryString",2);
219 }
220
221 if (! $LibToExport || ! $ExportFormat) {
222 print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
223 print "$PROG is a tool to export AWStats lib (Robots, Os, Browsers, search\n";
224 print "engines database) to text files. This allow you to use AWStats lib with some\n";
225 print "other log analyzers (to enhance their capabilities or to make comparison).\n";
226 print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software distributed\n";
227 print "with a GNU General Public License (See LICENSE file for details).\n";
228 print "\n";
229 print "Syntax: $PROG.$Extension -lib=/awstatslibpath/libfile.pm [-exportformat=format]\n";
230 print "\n";
231 print "Where format can be:\n";
232 rizwank 1.1 print " text (default)\n";
233 print " webalizer\n";
234 print " analog\n";
235 print "\n";
236 exit 2;
237 }
238
239 &Read_Ref_Data($LibToExport);
240
241
242 my $libisexportable=0;
243
244 # Export data
245 #------------
246
247 if ($LibToExport =~ /browsers/) {
248 foreach my $key (@BrowsersSearchIDOrder) {
249 if ($ExportFormat eq 'text') {
250 print "$key\t$BrowsersHashIDLib{$key}\n";
251 }
252 if ($ExportFormat eq 'webalizer') {
253 rizwank 1.1 print "GroupAgent\t$key\n";
254 }
255 if ($ExportFormat eq 'analog') {
256 print "Analog does not support self-defined browsers.\nUse 'text' export format if you want an export list of AWStats Browsers.\n";
257 last;
258 }
259 }
260 $libisexportable=1;
261 }
262
263 if ($LibToExport =~ /mime/) {
264 if ($ExportFormat eq 'analog') {
265 foreach my $key (sort keys %MimeHashFamily) {
266 if ($MimeHashFamily{$key} =~ /(text|page|script|document)/) { print "PAGEINCLUDE *.$key\n"; }
267 }
268 }
269 foreach my $key (sort keys %MimeHashFamily) {
270 if ($ExportFormat eq 'text') {
271 print "$key\t$MimeHashLib{$MimeHashFamily{$key}}\n";
272 }
273 if ($ExportFormat eq 'webalizer') {
274 rizwank 1.1 print "Webalizer does not support self-defined mime types.\nUse 'text' export format if you want an export list of AWStats Mime types.\n";
275 last;
276 }
277 if ($ExportFormat eq 'analog') {
278 print "TYPEALIAS .$key \"$key [$MimeHashLib{$MimeHashFamily{$key}}]\"\n";
279 }
280 }
281 $libisexportable=1;
282 }
283
284 if ($LibToExport =~ /operating_systems/) {
285 foreach my $key (sort keys %OSHashLib) {
286 if ($ExportFormat eq 'text') {
287 print "Feature not ready yet\n";
288 last;
289 }
290 if ($ExportFormat eq 'webalizer') {
291 print "Webalizer does not support self-defined added OS.\nUse 'text' export format if you want an export list of AWStats OS.\n";
292 last;
293 }
294 if ($ExportFormat eq 'analog') {
295 rizwank 1.1 print "Analog does not support self-defined added OS.\nUse 'text' export format if you want an export list of AWStats OS.\n";
296 last;
297 }
298 }
299 $libisexportable=1;
300 }
301
302 if ($LibToExport =~ /robots/) {
303 my %robotlist=();
304
305 my @list;
306 # Init RobotsSearchIDOrder required for update process
307 @list=();
308 foreach (1..2) { push @list,"list$_"; }
309 push @list,"listgen";
310 foreach my $key (@list) {
311 push @RobotsSearchIDOrder,@{"RobotsSearchIDOrder_$key"};
312 }
313
314 foreach my $key (@RobotsSearchIDOrder) {
315 if ($ExportFormat eq 'text') {
316 rizwank 1.1 print "$key\t$RobotsHashIDLib{$key}\n";
317 }
318 if ($ExportFormat eq 'webalizer') {
319 print "GroupAgent\t$key\n";
320 }
321 if ($ExportFormat eq 'analog') {
322 print "ROBOTINCLUDE REGEXPI:$key\n";
323 }
324 }
325 $libisexportable=1;
326 }
327
328 if ($LibToExport =~ /search_engines/) {
329
330 my @list;
331 # Init SearchEnginesIDOrder required for update process
332 @list=();
333 foreach (1..2) { push @list,"list$_"; }
334 push @list,"listgen"; # Always added
335 foreach my $key (@list) {
336 push @SearchEnginesSearchIDOrder,@{"SearchEnginesSearchIDOrder_$key"};
337 rizwank 1.1 }
338
339 foreach my $key (@SearchEnginesSearchIDOrder) {
340 if ($ExportFormat eq 'text') {
341 print "$key\t$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}}\t$SearchEnginesHashLib{$SearchEnginesHashID{$key}}\n";
342 }
343 if ($ExportFormat eq 'webalizer') {
344 my $urlkeywordsyntax=$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}};
345 my $urlkeywordsyntax=&unregexkeywordcode($urlkeywordsyntax,1);
346 if (! $urlkeywordsyntax) { next; } # This has no keywordextractcode
347 my $newkey=&unregex($key);
348 if ($newkey =~ /[\[\]\(\)\|\?\*\+]/) { next; } # This was a regex value that i can't clean
349 print "SearchEngine\t$newkey\t$urlkeywordsyntax\n";
350 print "GroupReferrer\t$newkey\t$SearchEnginesHashLib{$SearchEnginesHashID{$key}}\n";
351 }
352 if ($ExportFormat eq 'analog') {
353 my $urlkeywordsyntax=$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}};
354 $urlkeywordsyntax=~s/=$//;
355 $urlkeywordsyntax=&unregexkeywordcode($urlkeywordsyntax);
356 if (! $urlkeywordsyntax) { next; } # This has no keywordextractcode
357 my $newkey=&unregex($key);
358 rizwank 1.1 if ($newkey =~ /[\[\]\(\)\|\?\*\+]/) { next; } # This was a regex value that i can't clean
359 print "SEARCHENGINE http://*$newkey*/* $urlkeywordsyntax\n";
360 }
361 }
362 $libisexportable=1;
363 }
364
365 if (! $libisexportable) {
366 print "Export for AWStats lib '$LibToExport' is not supported in this tool version.\n";
367 }
368
369
370 0; # Do not remove this line
371
|