#!/usr/bin/perl -w # ################################################################################ # Script to retrieve data from the urchin.cgi engine and print a text-based # report, which could then be emailed, converted to HTML, etc. # # To customize this script for your Urchin installation, edit the defaults below # for $urchinpath, $profile, and $language. # # # Usage: u6data_extractor.pl [--begin YYYYMMDD] [--end YYYYMMDD] [--help] # [--language LA] [--max N] [--profile PROFILE] # [--report RRRR] [--urchinpath PATH] # # Where: # '--begin YYYYMMDD' specifies the starting date (default: one week ago) # '--end YYYYMMDD' specifies the ending date (default: yesterday) # '--help' displays this message # '--language LA' specifies the language for the report. Available # languages are: ch, en, fr, ge, it, ja, ko, po, sp, and sw # '--max N' is the maximum number of entries printed in the top 10 report # types (default is 10). # '--profile PROFILE' specifies the profile to retrieve data from. The # default is specified at the beginning of this script # '--report RRRR is the 4-digit number for the report (default is 1102) # Run this script with --help to see a list of available reports # '--urchinpath PATH' specifies the path to the Urchin distribution. # Note that you can edit the script and set your path as a default # # Copyright (c) 2001,2002,2003,2004 Urchin Software Corporation # $Revision: 1.6 $ # 2008, Rewritten, $Revision: 1.2 $ # # DISCLAIMER: This program is supplied AS-IS, with no warranties # implied or expressed. Use at your own risk. # ################################################################################ use strict; use Getopt::Long; use DBI(); #------------------------------------------------------------------------------- # Define the path to the Urchin distribution and default values for profile # name, language, report number, and maximum number of items to show in a list. # # Edit the following defaults to customize this script for your particular site. #------------------------------------------------------------------------------- my $urchinpath = "/urs/local/urchin"; # Path to the Urchin distribution my $profile = "www.urchin.com"; # Name of the default profile my $language = "en-US"; # Language (see usage for options) my $report = 1501; # Report number my $max = 10; # Number of Top 10 items to show #------------------------------------------------------------------------------- # Do NOT edit anything below this line. #------------------------------------------------------------------------------- my $def_profile = $profile; #------------------------------------------------------------------------------- # Calculate the default beginning and ending dates. #------------------------------------------------------------------------------- my ( $day, $month, $year, $begin, $end ); # Default beginning date is one week ago ( $day, $month, $year ) = ( localtime( time() - ( 7 * 86400 ) ) )[ 3, 4, 5 ]; $begin = ( $year + 1900 ) * 10000 + ( $month + 1 ) * 100 + $day; # Default ending date is yesterday ( $day, $month, $year ) = ( localtime( time() - 86400 ) )[ 3, 4, 5 ]; $end = ( $year + 1900 ) * 10000 + ( $month + 1 ) * 100 + $day; #------------------------------------------------------------------------------- # Get the command line options and display help if requested #------------------------------------------------------------------------------- my $help = 0; GetOptions( 'begin=i' => \$begin, 'end=i' => \$end, 'help' => \$help, 'language=s' => \$language, 'max=i' => \$max, 'profile=s' => \$profile, 'report=i' => \$report, 'urchinpath=s' => \$urchinpath ); if ( $max < 0 ) { die "Error: --max option should be positive integer, but negative vaules has been entered '" . $max . "'\n"; } elsif ( $max == 0 ) { die "Error: --max option values should be grater than zero!\n"; } if ($help) { &usage(); exit; } #------------------------------------------------------------------------------- # Check for the urchin path, uconf-driver, and urchin.cgi and display help if # these do not exist. #------------------------------------------------------------------------------- if ( !-e "$urchinpath" ) { print "\nERROR: The Urchin path, $urchinpath, does not exist. Please specify the path to the Urchin distribution on the command line, or edit the script and change the default for the \$urchinpath variable. Exiting.\n\n"; &usage(); exit; } elsif ( !-e "$urchinpath/bin/urchin.cgi" ) { print "\nERROR: $urchinpath/bin/urchin.cgi does not exist. Please verify that your Urchin path is set correctly and is pointing to a full Urchin distribution. Exiting.\n\n"; &usage(); exit; } #------------------------------------------------------------------------------- # Read Urchin configuration for report id and profile type #------------------------------------------------------------------------------- my $urchinconfig = $urchinpath . "/etc/urchin.conf"; # locate of urchin.conf my @lang_dictionary; my %config; &parseUrchinConf( $urchinconfig, \%config ); &readDictionary( $urchinpath . '/lib/reporting/languages/' . $language . '.txt', \@lang_dictionary ); my $dbi = $config{'SQLType'}; if ( $dbi eq "postgresql" ) { $dbi = "Pg"; } my $dsn = sprintf( "DBI:%s:database=%s;host=%s", $dbi, $config{'SQLDatabase'}, $config{'SQLServer'} ); my $dbh = DBI->connect( $dsn, $config{'SQLUsername'}, $config{'SQLPassword'}, { 'RaiseError' => 1 } ); my $sth = $dbh->prepare( sprintf( "SELECT * FROM uprofiles WHERE ucpr_name='%s'", $profile ) ); if ( !$sth ) { die "Error:" . $dbh->errstr . "\n"; } if ( !$sth->execute ) { die "Error:" . $sth->errstr . "\n"; } my ( $rid, $profiletype, $reportset ); my $ref = $sth->fetchrow_hashref(); if ($ref) { $rid = $ref->{'uspr_id'}; $profiletype = $ref->{'ucpr_type'}; # "Default"; if ( $ref->{'ucpr_report_set'} =~ /Standard/ ) { $reportset = 'default'; #"default"; } else { $reportset = $ref->{'ucpr_report_set'}; } $reportset .= ".rs2"; $sth->finish(); $dbh->disconnect(); } else { $sth->finish(); $dbh->disconnect(); die "Profile \"$profile\" does not exist\n"; } #------------------------------------------------------------------------------- # Read the reportset for a list of available reports #------------------------------------------------------------------------------- my ( %name, %ifield, %table, %type, @parts, $line, $count, $repo ); my $rrs = "$urchinpath/lib/reporting/profiletypes/$profiletype/$reportset"; my $crs = "$urchinpath/lib/custom/profiletypes/$profiletype/$reportset"; open( RS, "$rrs" ) or open( RS, "$crs" ) or die "Failed to open reportset $!"; while () { next if /^#/; # skip comments lines next if /^(\s)*$/; # skip blank lines chomp; # remove trailing newline characters #------------------------------------------------------------------------------- # Separate the reportset parts #------------------------------------------------------------------------------- @parts = split( /\s+/, $_ ); #------------------------------------------------------------------------------- # Verify the entry is a report. Store the name, ifield and report type #------------------------------------------------------------------------------- if ( $parts[0] !~ /(R|S)(\d{4})/ ) { next; } $repo = $2; $name{$repo} = $lang_dictionary[ $parts[3] ]; $ifield{$repo} = $parts[9]; $type{$repo} = $parts[13]; } #------------------------------------------------------------------------------- # Exit if report is not available #------------------------------------------------------------------------------- if ( !$name{$report} ) { print "Report $report is not available. Available reports are:\n"; my $rep; foreach $rep ( sort ( keys(%name) ) ) { print " $rep $name{$rep}\n"; } exit; } #------------------------------------------------------------------------------- # Determine the number of columns for this report #------------------------------------------------------------------------------- my @ifields; my $columncount = 1; if ( $ifield{$report} ) { @ifields = split( /,/, $ifield{$report} ); $columncount = (@ifields); } else { $ifield{$report} = "-"; } #------------------------------------------------------------------------------- # Create query string, retrieve data, and print out values #------------------------------------------------------------------------------- my $cgi = "$urchinpath/bin/urchin.cgi"; $ENV{REQUEST_METHOD} = "GET"; $ENV{QUERY_STRING} = "rid=$rid&hl=$language&vid=$report&bd=$begin&ed=$end&ns=$max&ss=0&fd=&ft=2&sf=2&sb=1&dow=0&dt=3&dtc=2&dcomp=0&xd=1&x=1&user=admin"; $count = 0; open( CGI, "$cgi |" ) or die "Failed to open urchin.cgi: $!"; while () { chomp; #------------------------------------------------------------------------------- # Skip Content-type and empty lines and print comment lines #------------------------------------------------------------------------------- if ( $_ =~ /^Content-type:/ ) { next; } if ( $_ =~ /^$/ ) { next; } if ( $_ =~ /^#\-+/ ) { printf "################################################################################\n"; next; } if ( $_ =~ /^#/ ) { print "$_\n"; next; } #------------------------------------------------------------------------------- # Only print up to $max entries for TOP 10 type reports # TODO: hasn't senese due to report set format changed - now report type is digit #------------------------------------------------------------------------------- # RT = report type (1=graph totals,2=histogram,3=stack,4=top,5=tree,6=directory,7=graph-limited,8=dot,9=click through,10=defined funnel,11=homepage overlay, 12=map overlay,13=goal verification,14=goal path) # 20=kpi date report, 21=kpi top ten, 22=trend summary, 23=conversion summary, 24=Map Mini) if ( $type{$report} eq '4' || $type{$report} eq '5' || $type{$report} eq '6' ) { if ( $count > $max ) { exit; } $count++; } #------------------------------------------------------------------------------- # Format and print the report data #------------------------------------------------------------------------------- @parts = split( /\t/, $_ ); if ( $columncount == 1 ) { printf( "%-66s ", $parts[0] ); } else { printf( "%-38s ", $parts[0] ); } # CTYPES = column type used for formatting # 10=string 24=ratio # 11=date/yearmonth 25=pct # 12=date 26=time # 20=number 30=total pct # 21=revenue 40=revenue # 22=bytes 41=revenue/num # 23=time ratio 42=num/revenue for ( my $i = 1 ; $i < @parts ; $i++ ) { if ( $ifields[$i] ) { if ( $ifields[$i] == 22 ) { $parts[$i] = &formatBytes( $parts[$i] ); } if ( $ifields[$i] == 20 ) { $parts[$i] = &formatNumber( $parts[$i] ); } if ( $ifields[$i] == 26 ) { $parts[$i] = &formatTime( $parts[$i] ); } } printf( "%11s ", $parts[$i] ); } printf("\n"); } close(CGI) or die "Failed to close urchin.cgi: $!"; exit; #------------------------------------------------------------------------------- # Subroutines #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Changes a number into the proper byte format #------------------------------------------------------------------------------- sub formatBytes { my $x = $_[0]; if ( $x =~ /\d+/ ) { my $i = 0; while ( $x > 1024 ) { $i++; $x = $x / 1024; } my $suffix = ( "B", "KB", "MB", "GB", "TB" )[$i]; return sprintf( "%4.2f %2s", $x, $suffix ); } else { return $_[0]; } } #------------------------------------------------------------------------------- # Formats a number with commas to separate the thousands. Also displays non- # integer numbers with 2 decimal places. #------------------------------------------------------------------------------- sub formatNumber { my $x = $_[0]; if ( $x =~ /\d+/ ) { my $y = ''; my $z = ( ( 1000 * $x ) % 1000 ) / 10; $z = sprintf( "%02.0f", $z ); if ( $x < 1000 ) { if ( ( $z == 0 ) && ( $x >= 1 ) ) { return $x; } else { return ( int($x) ) . ".$z"; } } while ( $x > 1000 ) { my $rem = $x % 1000; $y = sprintf( ",%03u%s", $rem, $y ); $x = int( $x / 1000 ); } $y = $x . $y; if ( $z == 0 ) { return $y; } else { return "$y.$z"; } } else { return $_[0]; } } #------------------------------------------------------------------------------- # Formats a number into a XX:XX:XX time format. #------------------------------------------------------------------------------- sub formatTime { my $sec = $_[0] % 60; my $min = int( $_[0] / 60 ) % 60; my $hour = int( $_[0] / 3600 ); return sprintf( "%02d:%02d:%02d", $hour, $min, $sec ); } #-------------------------------------------------------------------------------- # read and parse urchin.config; return cofig entries as hash #-------------------------------------------------------------------------------- sub parseUrchinConf { my $file_name = $_[0]; if ( !defined $file_name || !-r $file_name ) { die "Error: config file \"" . $file_name . "\" doesn't exist\n"; } &parse_config_file( $file_name, $_[1] ); } #------------------------------------------------------------------------------- # parse_config_file(file, &global) # Parses config file into a list of hashes, # each representing one urchin config entry #------------------------------------------------------------------------------- sub parse_config_file { my $line; # Tokenize the file open( FILE, $_[0] ) or die "Can't open $_[0]: $!"; while () { s/#.*//; next if /^(\s)*$/; chomp; $line = $_; if ( $line =~ /^\s*([^"]*):\s+(.*)$/ ) { $_[1]{$1} = "$2"; } } close(FILE); } #------------------------------------------------------------------------------- # readDictionary(file, &global) # Parses dictionary into array #------------------------------------------------------------------------------- sub readDictionary { my $line; open( FILE, $_[0] ) or die "Can't open $_[0]: $!"; while () { s/#.*//; next if /^(\s)*$/; chomp; $line = $_; if ( $line =~ /^\s*(\d+)\s+(.*)$/ ) { $_[1][$1] = "$2"; } } close(FILE); } #------------------------------------------------------------------------------- # Displays the usage for this script #------------------------------------------------------------------------------- sub usage { my ( $rep, $des ); print "Usage: $0 [--begin YYYYMMDD] [--end YYYYMMDD] [--help] \\ [--language LA] [--max N] [--profile PROFILE] [--report RRRR] \\ [--urchinpath PATH] Where: '--begin YYYYMMDD' specifies the starting date (default is one week ago), '--end YYYYMMDD' specifies the ending date (default is yesterday), '--help' displays this message '--language la-LA' specifies the language for the report. Available languages are: de-DE en-US es-ES fr-FR it-IT ja-JP ko-KR nl-NL pt-BR zh-CN zh-TW '--max N' is the maximum number of entries printed in the top 10 report types (default is 10). '--profile PROFILE' specifies the profile to retrieve data from. The default is $def_profile. '--report RRRR is the 4-digit number for the report (default is 1101) Available reports are defined in the report sets. '--urchinpath PATH' specifies the path to the Urchin distribution. Note that you can edit the script and set your path as a default\n\n"; }