#!/usr/bin/perl -w # ################################################################################ # Script to retrieve data from the urchin.cgi engine and print a text-based # report, which could then be emailed, converted to HTML, etc. # # To customize this script for your Urchin installation, edit the defaults below # for $urchinpath, $profile, and $language. # # # Usage: u5data_extractor.pl [--begin YYYYMMDD] [--end YYYYMMDD] [--help] # [--language LA] [--max N] [--profile PROFILE] # [--report RRRR] [--urchinpath PATH] # # Where: # '--begin YYYYMMDD' specifies the starting date (default: one week ago) # '--end YYYYMMDD' specifies the ending date (default: yesterday) # '--help' displays this message # '--language LA' specifies the language for the report. Available # languages are: ch, en, fr, ge, it, ja, ko, po, sp, and sw # '--max N' is the maximum number of entries printed in the top 10 report # types (default is 10). # '--profile PROFILE' specifies the profile to retrieve data from. The # default is specified at the beginning of this script # '--report RRRR is the 4-digit number for the report (default is 1102) # Run this script with --help to see a list of available reports # '--urchinpath PATH' specifies the path to the Urchin distribution. # Note that you can edit the script and set your path as a default # # Copyright (c) 2001,2002,2003,2004 Urchin Software Corporation # $Revision: 1.6 $ # # DISCLAIMER: This program is supplied AS-IS, with no warranties # implied or expressed. Use at your own risk. # ################################################################################ use strict; use Getopt::Long; #------------------------------------------------------------------------------- # Define the path to the Urchin distribution and default values for profile # name, language, report number, and maximum number of items to show in a list. # # Edit the following defaults to customize this script for your particular site. #------------------------------------------------------------------------------- my $urchinpath = "/usr/local/urchin5"; # Path to the Urchin distribution my $profile = "www.urchin.com"; # Name of the default profile my $language = "en"; # Language (see usage for options) my $report = 1102; # Report number my $max = 10; # Number of Top 10 items to show #------------------------------------------------------------------------------- # Do NOT edit anything below this line. #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Calculate the default beginning and ending dates. #------------------------------------------------------------------------------- my ($day, $month, $year, $begin, $end); # Default beginning date is one week ago ($day, $month, $year) = (localtime(time() - (7*86400)))[3,4,5]; $begin = ($year + 1900)*10000 + ($month + 1)*100 + $day; # Default ending date is yesterday ($day, $month, $year) = (localtime(time() - 86400))[3,4,5]; $end = ($year + 1900)*10000 + ($month + 1)*100 + $day; #------------------------------------------------------------------------------- # Get the command line options and display help if requested #------------------------------------------------------------------------------- my $help = 0; GetOptions('begin=i' => \$begin, 'end=i' => \$end, 'help' => \$help, 'language=s' => \$language, 'max=i' => \$max, 'profile=s' => \$profile, 'report=i' => \$report, 'urchinpath=s' => \$urchinpath); if ($help) { &usage(); exit; } #------------------------------------------------------------------------------- # Check for the urchin path, uconf-driver, and urchin.cgi and display help if # these do not exist. #------------------------------------------------------------------------------- if (! -e "$urchinpath") { print "\nERROR: The Urchin path, $urchinpath, does not exist. Please specify the path to the Urchin distribution on the command line, or edit the script and change the default for the \$urchinpath variable. Exiting.\n\n"; &usage(); exit; } elsif (! -e "$urchinpath/util/uconf-driver") { print "\nERROR: $urchinpath/util/uconf-driver does not exist. Please verify that your Urchin path is set correctly and is pointing to a full Urchin distribution. Exiting.\n\n"; &usage(); exit; } elsif (! -e "$urchinpath/bin/urchin.cgi") { print "\nERROR: $urchinpath/bin/urchin.cgi does not exist. Please verify that your Urchin path is set correctly and is pointing to a full Urchin distribution. Exiting.\n\n"; &usage(); exit; } #------------------------------------------------------------------------------- # Read Urchin configuration for report id and profile type #------------------------------------------------------------------------------- my $result = `$urchinpath/util/uconf-driver table=profile action=seek name=\"$profile\"`; my $rid = $result; $rid =~ /recnum=(\d+)/; $rid = $1; if ((!defined $rid) || ($rid < 1)) { die "Profile \"$profile\" does not exist\n"; } my $profiletype = $result; $profiletype =~ /cr_profiletype="(\S+)"/; $profiletype = $1; if ((!defined $profiletype) || ($profiletype eq "")) { $profiletype = "Standard_Website"; } my $reportset = $result; $reportset =~ /cs_reportset="(\S+)"/; $reportset = $1; if ((!defined $reportset) || ($reportset eq "")) { $reportset = "Standard"; } $reportset .= ".rs"; #------------------------------------------------------------------------------- # Read the reportset for a list of available reports #------------------------------------------------------------------------------- my (%name, %ifield, %table, %type, @parts, $line, $count); my $rrs = "$urchinpath/lib/reporting/profiletypes/$profiletype/$reportset"; my $crs = "$urchinpath/lib/custom/profiletypes/$profiletype/$reportset"; open (RS, "$rrs") or open(RS, "$crs") or die "Failed to open reportset $!"; while() { chomp; if ($_ =~ /^#/) { next; } # Skip commented entries #------------------------------------------------------------------------------- # Separate the reportset parts #------------------------------------------------------------------------------- $count = 0; $line = $_; while($line) { if ($line =~ /^"(.*?)"\s+(.*)/) { $parts[$count] = $1; $line = $2; $count++; } elsif ($line =~ /^"(.*?)"\s*$/) { $parts[$count] = $1; $line = ""; } elsif ($line =~ /^(.*?)\s+(.*)/) { $parts[$count] = $1; $line = $2; $count++; } elsif ($line =~ /^(.*?)\s*$/) { $parts[$count] = $1; $line = ""; } } #------------------------------------------------------------------------------- # Verify the entry is a report. Store the name, ifield and table #------------------------------------------------------------------------------- if ($parts[1] !~ /report/i) { next; } $name{$parts[0]} = $parts[3]; $ifield{$parts[0]} = $parts[7]; $type{$parts[0]} = $parts[15]; } #------------------------------------------------------------------------------- # Exit if report is not available #------------------------------------------------------------------------------- if (!$name{$report}) { print "Report $report is not available. Available reports are:\n"; my $rep; foreach $rep (sort (keys(%name))) { print " $rep $name{$rep}\n"; } exit; } #------------------------------------------------------------------------------- # Determine the number of columns for this report #------------------------------------------------------------------------------- my @ifields; my $columncount = 1; if ($ifield{$report}) { @ifields = split(/!/, $ifield{$report}); $columncount = @ifields; } else { $ifield{$report} = "-"; } #------------------------------------------------------------------------------- # Create query string, retrieve data, and print out values #------------------------------------------------------------------------------- my $cgi = "$urchinpath/bin/urchin.cgi"; $ENV{REQUEST_METHOD} = "GET"; $ENV{QUERY_STRING} = "vid=$report&bd=$begin&ed=$end&v=$max&rid=$rid&lang=$language&x=1&user=(admin)"; $count = 0; open (CGI, "$cgi |") or die "Failed to open urchin.cgi: $!"; while() { chomp; #------------------------------------------------------------------------------- # Skip Content-type and empty lines and print comment lines #------------------------------------------------------------------------------- if ($_ =~ /^Content-type:/) { next; } if ($_ =~ /^$/) { next; } if ($_ =~ /^##/) { printf "################################################################################\n"; next; } if ($_ =~ /^#/) { print "$_\n"; next; } #------------------------------------------------------------------------------- # Only print up to $max entries for TOP 10 type reports #------------------------------------------------------------------------------- if ($type{$report} eq "TOP") { $count++; if ($count > $max) { exit; } } #------------------------------------------------------------------------------- # Format and print the report data #------------------------------------------------------------------------------- @parts = split(/\t/, $_); if ($columncount == 1) { printf("%-66s ", $parts[0]); } else { printf("%-38s ", $parts[0]); } if ($name{$report} =~ /summary$/i) { if ($parts[0] =~ /(visitors|sessions|pageviews|hits|transaction|revenue)/i) { $parts[1] = &formatNumber($parts[1]); } if ($parts[0] =~ /bytes/i) { $parts[1] = &formatBytes($parts[1]); } if ($parts[0] =~ /length/i) { $parts[1] = &formatTime($parts[1]); } printf("%13s\n", $parts[1]); } elsif ($name{$report} =~ /^overall results$/i) { if ($parts[0] =~ /(sessions|pageviews|transactions|new leads|clicks|goals|impressions)/i) { $parts[1] = &formatNumber($parts[1]); } if ($parts[0] =~ /(revenue|cost)/i) { $parts[1] = &formatNumber($parts[1]); } printf("%13s\n", $parts[1]); } else { for (my $i = 1; ($i <= $columncount) && ($i <= (@parts + 1)); $i++) { if ($ifields[$i-1] =~ /^(visitors|sessions|pageviews|hits|trans|items|newleads|clicks|goals|impress|ratio|pct)$/i) { $parts[$i] = &formatNumber($parts[$i]); } if ($ifields[$i-1] =~ /^(bytes)$/i) { $parts[$i] = &formatBytes($parts[$i]); } if ($ifields[$i-1] =~ /^(time)$/i) { $parts[$i] = &formatTime($parts[$i]); } if ($ifields[$i-1] =~ /^(revenue|cost)$/i) { $parts[$i] /= 100; $parts[$i] = &formatNumber($parts[$i]); } printf("%13s ", $parts[$i]); } printf("\n"); } } close(CGI) or die "Failed to close urchin.cgi: $!"; exit; #------------------------------------------------------------------------------- # Subroutines #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Changes a number into the proper byte format #------------------------------------------------------------------------------- sub formatBytes { my $x = $_[0]; my $i = 0; while ($x > 1024) { $i++; $x = $x/1024; } my $suffix = ("B", "KB", "MB", "GB", "TB")[$i]; return sprintf("%4.2f %2s", $x, $suffix); } #------------------------------------------------------------------------------- # Formats a number with commas to separate the thousands. Also displays non- # integer numbers with 2 decimal places. #------------------------------------------------------------------------------- sub formatNumber { my $x = $_[0]; my $y = ''; my $z = ((1000*$x) % 1000)/10; $z = sprintf("%02.0f", $z); if ($x < 1000) { if (($z == 0) && ($x >= 1)) { return $x; } else { return (int($x)).".$z"; } } while ($x > 1000) { my $rem = $x % 1000; $y = sprintf(",%03u%s", $rem, $y); $x = int($x/1000); } $y = $x.$y; if ($z == 0) { return $y; } else { return "$y.$z"; } } #------------------------------------------------------------------------------- # Formats a number into a XX:XX:XX time format. #------------------------------------------------------------------------------- sub formatTime { my $sec = $_[0] % 60; my $min = int($_[0]/60) % 60; my $hour = int($_[0]/3600); return sprintf("%02d:%02d:%02d", $hour, $min, $sec); } #------------------------------------------------------------------------------- # Displays the usage for this script #------------------------------------------------------------------------------- sub usage { my ($rep, $des); print "Usage: $0 [--begin YYYYMMDD] [--end YYYYMMDD] [--help] \\ [--language LA] [--max N] [--profile PROFILE] [--report RRRR] \\ [--urchinpath PATH] Where: '--begin YYYYMMDD' specifies the starting date (default is one week ago), '--end YYYYMMDD' specifies the ending date (default is yesterday), '--help' displays this message '--language LA' specifies the language for the report. Available languages are: ch, en, fr, ge, it, ja, ko, po, sp, and sw '--max N' is the maximum number of entries printed in the top 10 report types (default is 10). '--profile PROFILE' specifies the profile to retrieve data from. The default is $profile. '--report RRRR is the 4-digit number for the report (default is 1101) Available reports are defined in the report sets. '--urchinpath PATH' specifies the path to the Urchin distribution. Note that you can edit the script and set your path as a default\n\n"; }