summaryrefslogtreecommitdiff
path: root/scripts/apache-log-analyse.pl
diff options
context:
space:
mode:
authorMichael Meeks <michael.meeks@suse.com>2011-10-20 11:47:21 +0100
committerMichael Meeks <michael.meeks@suse.com>2011-10-20 11:47:21 +0100
commit684447d1be8e14510fe07f02e08b43c30da28431 (patch)
tree8a819f5eedb4fe89bdbc59a2b15f7a5e4c7bcd4b /scripts/apache-log-analyse.pl
parentf1489ac7bdb84c8a17dc2ceefe6c05f00490cff4 (diff)
start of script to analyse apache logs and generate ODF prettiness
Diffstat (limited to 'scripts/apache-log-analyse.pl')
-rwxr-xr-xscripts/apache-log-analyse.pl53
1 files changed, 53 insertions, 0 deletions
diff --git a/scripts/apache-log-analyse.pl b/scripts/apache-log-analyse.pl
new file mode 100755
index 00000000..261c06cf
--- /dev/null
+++ b/scripts/apache-log-analyse.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl -w
+
+my %bydate; # date -> page -> count
+my %referrers; # count of referrers by URL
+
+while (<>) {
+ my $line = $_;
+ my $slice = $line;
+# wiki.documentfoundation.org.log:wiki.documentfoundation.org:80 190.69.122.160 - - [16/Oct/2011:08:17:18 +0200] "GET /Development/Easy_Hacks HTTP/1.1" 200 13665 "http://www.libreoffice.org/get-involved/" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1"
+ my ($server, $host, $date, $page, $referrer);
+ if ($slice =~ s/^[^:]+:([^:]+):\d+\s+([\d\.]+)\s+-\s+-\s+//) {
+ $server = $1; $host = $2;
+
+# [11/Oct/2011:06:26:20 +0200] "GET /Development/Easy_Hacks_by_Difficulty/be HTTP/1.1" 404 5516 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
+ if ($slice =~ s/^\[\s*([^\]]+)\s*\]\s+//) {
+ $date = $1;
+
+# "GET /Development/Easy_Hacks HTTP/1.1" 200 13663 "-" "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)
+ if ($slice =~ s/^\"([^"]+)\"\s+\d+\s+\d+\s+//) {
+ $page = $1;
+# "http://wiki.documentfoundation.org/Development/Easy_Hacks" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)"
+ if ($slice =~ s/^\"([^"]*)\"//) {
+ $referrer = $1;
+ $referrer = '' if ($referrer eq '-');
+# printf "line remainder $slice\n";
+ } else {
+ die "invalid referrer: $slice";
+ }
+ } else {
+ die "invalid page fetch: $slice";
+ }
+ } else {
+ die "invalid date in: $slice";
+ }
+ }
+ else {
+ die "invalid host in $slice";
+ }
+
+ # cleanup page
+ $page =~ s/^GET\s+//;
+ $page =~ s/s*HTTP\s*$//;
+ $page =~ s/\s*HTTP\/[\d\.]*$//;
+
+ $referrers{$referrer} = 0 if (!defined $referrers{$referrer});
+ $referrers{$referrer} += 1;
+
+# print "server '$server' -> host '$host' -> page '$page' -> referrer '$referrer'\n";
+}
+
+for my $ref (keys %referrers) {
+ print "$ref\t" . $referrers{$ref} . "\n";
+}