diff options
author | Michael Meeks <michael.meeks@suse.com> | 2011-10-20 11:47:21 +0100 |
---|---|---|
committer | Michael Meeks <michael.meeks@suse.com> | 2011-10-20 11:47:21 +0100 |
commit | 684447d1be8e14510fe07f02e08b43c30da28431 (patch) | |
tree | 8a819f5eedb4fe89bdbc59a2b15f7a5e4c7bcd4b /scripts/apache-log-analyse.pl | |
parent | f1489ac7bdb84c8a17dc2ceefe6c05f00490cff4 (diff) |
start of script to analyse apache logs and generate ODF prettiness
Diffstat (limited to 'scripts/apache-log-analyse.pl')
-rwxr-xr-x | scripts/apache-log-analyse.pl | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/scripts/apache-log-analyse.pl b/scripts/apache-log-analyse.pl new file mode 100755 index 00000000..261c06cf --- /dev/null +++ b/scripts/apache-log-analyse.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl -w + +my %bydate; # date -> page -> count +my %referrers; # count of referrers by URL + +while (<>) { + my $line = $_; + my $slice = $line; +# wiki.documentfoundation.org.log:wiki.documentfoundation.org:80 190.69.122.160 - - [16/Oct/2011:08:17:18 +0200] "GET /Development/Easy_Hacks HTTP/1.1" 200 13665 "http://www.libreoffice.org/get-involved/" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1" + my ($server, $host, $date, $page, $referrer); + if ($slice =~ s/^[^:]+:([^:]+):\d+\s+([\d\.]+)\s+-\s+-\s+//) { + $server = $1; $host = $2; + +# [11/Oct/2011:06:26:20 +0200] "GET /Development/Easy_Hacks_by_Difficulty/be HTTP/1.1" 404 5516 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" + if ($slice =~ s/^\[\s*([^\]]+)\s*\]\s+//) { + $date = $1; + +# "GET /Development/Easy_Hacks HTTP/1.1" 200 13663 "-" "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/) + if ($slice =~ s/^\"([^"]+)\"\s+\d+\s+\d+\s+//) { + $page = $1; +# "http://wiki.documentfoundation.org/Development/Easy_Hacks" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)" + if ($slice =~ s/^\"([^"]*)\"//) { + $referrer = $1; + $referrer = '' if ($referrer eq '-'); +# printf "line remainder $slice\n"; + } else { + die "invalid referrer: $slice"; + } + } else { + die "invalid page fetch: $slice"; + } + } else { + die "invalid date in: $slice"; + } + } + else { + die "invalid host in $slice"; + } + + # cleanup page + $page =~ s/^GET\s+//; + $page =~ s/s*HTTP\s*$//; + $page =~ s/\s*HTTP\/[\d\.]*$//; + + $referrers{$referrer} = 0 if (!defined $referrers{$referrer}); + $referrers{$referrer} += 1; + +# print "server '$server' -> host '$host' -> page '$page' -> referrer '$referrer'\n"; +} + +for my $ref (keys %referrers) { + print "$ref\t" . $referrers{$ref} . "\n"; +} |