/Main_Page

::You must have ninja focus to complete your mission::NinjaFocus::

Calamaris

Views:


Calamaris is a log analyser for squid. It makes nice reports that management type like. Actually they're very usful in their own right.

This script will run calamaris over the squid access logs every night and build up a collection of Daily, Weekly and Monthly reports going back over the last two years.

It relies on a little perl script to split the access log up in to one log file per day. Most servers rotate their logs in the early hours of the morning. I wanted the reports to represent one whole day from 00:00 hrs to 23:59 hrs

access-log-split.pl:

#!/usr/bin/env perl

# Take the squid log file specified as the first argument when calling this script, 
# then split all entries in to seperate files for each day.

use strict;
use warnings;
use File::Basename;
use POSIX;

sub isoDate {
    return strftime("%Y-%m-%d", localtime(shift or time));
}

my $logtype = "access";

my $previousDate = '';
my $logfile;
while (<>) {
    my $line = $_;
    my $timeStamp = 0;
    if ($line =~ /^(\d+)\..*/) {
        $timeStamp = $1;
    }
    my $currentDate = isoDate($timeStamp);
    if ($currentDate ne $previousDate || !$logfile) {
        if ($logfile) {
            close($logfile);
        }
        open $logfile, ">>", "$logtype.$currentDate" 
            or die "Could not open $logtype.$currentDate: $!";
    }
    print $logfile $line;
}


calamaris.sh:

#!/bin/bash
#
# A script to generate calamaris log reports from the squid access logs
#
# This script depends on the squid log rotation script, 
# /etc/cron.daily/squid.daily.job, and must be run after it.
#
# The script creates daily, weekly and monthly reports. It also keeps 
# the last copy of any given day, week or month to allow comaprisons.
#
# The script needs to run each day, it won't catch up if any days are
# missed. If that happens, weekly and monthly reports will still be
# created but won't be accurate.
#
# A simple perl script, "access-log-split.pl", is used to divide log 
# data in to one file per day, which is then boiled down by calamaris
# in to daily cache files. These cache files are kept for one month. 

################
# Configuration

CALAMARIS=/usr/local/calamaris/calamaris
CONF=/usr/local/calamaris/calamaris.conf

# Logs are picked up from the log archive directory. See /etc/cron.daily/squid.daily.job
LOG_DIR=/var/log/squid
ACCESS_LOG="${LOG_DIR}/access.log"

CACHE_HOSTNAME=squid-cache.example.com

CURRENT_REPORTS_DIR=/var/lib/calamaris/reports
PREVIOUS_REPORTS_DIR="${CURRENT_REPORTS_DIR}/previous"

CACHE=/var/lib/calamaris/cache

BZCAT=`which bzcat`
CAT=`which cat`
LOGSPLIT=/usr/local/bin/access-log-split.pl
RSYNC=`which rsync`

REMOTE_HOST=www.example.com
REMOTE_USER=calamari
REMOTE_PATH=/var/www/html/squid-reports
REMOTE_SHELL="ssh -q -i /var/ib/calamaris/.ssh/id_rsa -o UserKnownHostsFile=/var/lib/calamaris/.ssh/known_hosts"

NICENESS=39

DAILY_REPORT_LABEL=daily
WEEKLY_REPORT_LABEL=weekly
MONTHLY_REPORT_LABEL=monthly


########################
# Options for calamaris

DEFAULT_ARGUMENTS="
--errorcode-distribution-report \
--input-format squid-extended \
--hostname ${CACHE_HOSTNAME} \
--peak-report new \
--response-time-report \
--size-distribution-report 10 \
--status-report \
--type-report-ignore-case"

# When generating cache files, keep don't limit the lenght of reports
CACHE_ARGUMENTS="${DEFAULT_ARGUMENTS} \
--benchmark 50000 \
--domain-report -1 \
--performance-report 60 \
--requester-report -1 \
--type-report -1"

# Limit the length of reports for general consumption - to limit 
# privacy issues
OUTPUT_ARGUMENTS="${DEFAULT_ARGUMENTS} \
--benchmark 10000 \
--domain-report 50 \
--requester-report 50 \
--type-report 50 \
--output-format html,graph \
--image-type gif"


############
# functions

# date adjusts to the given number of days in the past and 
# formatted using the provided formatting string
# usage: pastDate <minus no. days> <format>
pastDate()
{
    if [ ! $1 ]
    then
        exit 1
    fi
    if [ ! $2 ]
    then 
        exit 1
    fi
    if [ `uname` = "Darwin" ]
    then
        echo $(date -v -"$1"d +"$2")
    fi
    if [ `uname` = "Linux" ]
    then
        echo $(date --date="$1 day ago" +"$2")
    fi
}

##############################
# Directory Checks / Creation

# Check required directories are writable - try to create them if missing or
# exit.
test -e "${CACHE}" || test -d "${CACHE}" || mkdir "${CACHE}"
if [ ! -d "${CACHE}" -o ! -w "${CACHE}" ]
then
    echo "The cache directory, "
    echo "'${CACHE}',"
    echo "does not exist or is not writable"
    exit 1
fi

if [ ! -d "${CURRENT_REPORTS_DIR}" -o ! -w "${CURRENT_REPORTS_DIR}" ]
then
    echo "The reports directory, "
    echo "'${CURRENT_REPORTS_DIR}',"
    echo "does not exist or is not writable"
    exit 1
fi

DAILY_DIR="${CURRENT_REPORTS_DIR}/${DAILY_REPORT_LABEL}"
test -e "${DAILY_DIR}" || test -d "${DAILY_DIR}" || mkdir "${DAILY_DIR}"
if [ ! -d "${DAILY_DIR}" -o ! -w "${DAILY_DIR}" ]
then
    echo "The daily reports directory, "
    echo "'${DAILY_DIR}',"
    echo " does not exist or is not writable"
    exit 1
fi

WEEKLY_DIR="${CURRENT_REPORTS_DIR}/${WEEKLY_REPORT_LABEL}"
test -e "${WEEKLY_DIR}" || test -d "${WEEKLY_DIR}" || mkdir "${WEEKLY_DIR}"
if [ ! -d "${WEEKLY_DIR}" -o ! -w "${WEEKLY_DIR}" ]
then
    echo "The monthly reports directory, "
    echo "'${WEEKLY_DIR}',"
    echo " does not exist or is not writable"
    exit 1
fi

MONTHLY_DIR="${CURRENT_REPORTS_DIR}/${MONTHLY_REPORT_LABEL}"
test -e "${MONTHLY_DIR}" || test -d "${MONTHLY_DIR}" || mkdir "${MONTHLY_DIR}"
if [ ! -d "${MONTHLY_DIR}" -o ! -w "${MONTHLY_DIR}" ]
then
    echo "The monthly reports directory, "
    echo "'${MONTHLY_DIR}',"
    echo " does not exist or is not writable"
    exit 1
fi

if [ ! -d "${PREVIOUS_REPORTS_DIR}" -o ! -w "${PREVIOUS_REPORTS_DIR}" ]
then
    echo "The previous reports directory, "
    echo "'${PREVIOUS_REPORTS_DIR}',"
    echo "does not exist or is not writable"
    exit 1
fi

PREV_DAILY_DIR="${PREVIOUS_REPORTS_DIR}/${DAILY_REPORT_LABEL}"
test -e "${PREV_DAILY_DIR}" || test -d "${PREV_DAILY_DIR}" || mkdir "${PREV_DAILY_DIR}"
if [ ! -d "${PREV_DAILY_DIR}" -o ! -w "${PREV_DAILY_DIR}" ]
then
    echo "The daily reports directory, "
    echo "'${PREV_DAILY_DIR}',"
    echo " does not exist or is not writable"
    exit 1
fi

PREV_WEEKLY_DIR="${PREVIOUS_REPORTS_DIR}/${WEEKLY_REPORT_LABEL}"
test -e "${PREV_WEEKLY_DIR}" || test -d "${PREV_WEEKLY_DIR}" || mkdir "${PREV_WEEKLY_DIR}"
if [ ! -d "${PREV_WEEKLY_DIR}" -o ! -w "${PREV_WEEKLY_DIR}" ]
then
    echo "The monthly reports directory, "
    echo "'${PREV_WEEKLY_DIR}',"
    echo " does not exist or is not writable"
    exit 1
fi

PREV_MONTHLY_DIR="${PREVIOUS_REPORTS_DIR}/${MONTHLY_REPORT_LABEL}"
test -e "${PREV_MONTHLY_DIR}" || test -d "${PREV_MONTHLY_DIR}" || mkdir "${PREV_MONTHLY_DIR}"
if [ ! -d "${PREV_MONTHLY_DIR}" -o ! -w "${PREV_MONTHLY_DIR}" ]
then
    echo "The monthly reports directory, "
    echo "'${PREV_MONTHLY_DIR}',"
    echo " does not exist or is not writable"
    exit 1
fi


########
# Dates

# these dates relate to the log files, i.e. yesterdays logs and activity.
DAY_OF_WEEK=$(pastDate 1 %w)
WEEK=$(pastDate 1 %U)
MONTH=$(pastDate 1 %m)
YEAR=$(pastDate 1 %Y)
DATE=$(pastDate 1 %F)
PREV_DATE="$(($YEAR - 1))${DATE:(-6)}"
PREV_PREV_DATE="$(($YEAR - 2))${DATE:(-6)}"

# if today is the 1st of the month, then yesterday was the end of the month
MONTH_ENDED=$(
    test $(date +%d) = 01 && echo true
)


#############################
# Process and Cache Log Data 

cd "${CACHE}"
if [ -f "${ACCESS_LOG}.${DAY_OF_WEEK}.bz2" ]
then
    $BZCAT "${ACCESS_LOG}.${DAY_OF_WEEK}.bz2" | $LOGSPLIT
    echo "Generating daily cache file"
    $CAT "${CACHE}/access.${DATE}" | \
    	nice -n ${NICENESS} "${CALAMARIS}" \
    	${CACHE_ARGUMENTS} \
    	--cache-output-file "${CACHE}/cache.${DATE}" > /dev/null
    rm "${CACHE}/access.${DATE}"
    echo
else
    echo "WARNING: No cache found for ${DATE}"
fi


###########################
# Create Daily HTML Report

if [ -d "${DAILY_DIR}/${PREV_DATE}" ]
then
    echo "Moving previous Daily HTML report"
    [ -d "${PREV_DAILY_DIR}/${PREV_PREV_DATE}" ] && rm -rf "${PREV_DAILY_DIR}/${PREV_PREV_DATE}"
    mv "${DAILY_DIR}/${PREV_DATE}" "${PREV_DAILY_DIR}/${PREV_DATE}"
fi

echo "Creating Daily HTML Report"

if [ -f "${CACHE}/cache.${DATE}" ]
then
    mkdir -p "${DAILY_DIR}/${DATE}"

    # Get data from the cache file for the most recent day
    nice -n ${NICENESS} "${CALAMARIS}" \
        ${OUTPUT_ARGUMENTS} \
        --no-input \
        --performance-report 60 \
        --cache-input-file "${CACHE}/cache.${DATE}" \
        --output-path "${DAILY_DIR}/${DATE}"
    echo
else
    echo "WARNING: No cache found for ${DATE}"
fi


#######################
# Create Weekly Report

if [ $DAY_OF_WEEK -eq 6 ]
then

    if [ -d "${WEEKLY_DIR}/${WEEK}" ]
    then
        echo "Moving previous Weekly HTML report"
        if [ -d "${PREV_WEEKLY_DIR}/${WEEK}" ]
        then 
            rm -rf "${PREV_WEEKLY_DIR}/${WEEK}"
        fi
        mv "${WEEKLY_DIR}/${WEEK}" "${PREV_WEEKLY_DIR}/${WEEK}"
    fi

    echo "Creating Weekly HTML Report"

    CACHE_FILES=
    DATE_ADJUSTMENT=$(( $DAY_OF_WEEK + 1 ))
    while [ $DATE_ADJUSTMENT -ge 1 ]
    do
        
        CACHE_DATE=$(pastDate $DATE_ADJUSTMENT %F)
        
        if [ -f "${CACHE}/cache.${CACHE_DATE}" ]
        then
            if [ $CACHE_FILES ]
            then
                CACHE_FILES="${CACHE_FILES}:"
            fi

            CACHE_FILES="${CACHE_FILES}${CACHE}/cache.${CACHE_DATE}"
        else
            echo "WARNING: No cache found for ${CACHE_DATE}"
            continue
        fi
        
        DATE_ADJUSTMENT=$(( $DATE_ADJUSTMENT - 1 ))
        
    done
    if [ $CACHE_FILES ]
    then
        mkdir -p "${WEEKLY_DIR}/${WEEK}"

        nice -n ${NICENESS} "${CALAMARIS}" \
    	    ${OUTPUT_ARGUMENTS} \
        	--no-input \
        	--cache-input-file "${CACHE_FILES}" \
        	--output-path "${WEEKLY_DIR}/${WEEK}"
        echo
    fi    
    
fi




########################
# Create Monthly Report

if [ $MONTH_ENDED ]
then

    if [ -d "${MONTHLY_DIR}/${MONTH}" ]
    then
        echo "Moving previous Monthly HTML report"
        if [ -d "${PREV_MONTHLY_DIR}/${MONTH}" ]
        then 
            rm -rf "${PREV_MONTHLY_DIR}/${MONTH}"
        fi
        mv "${MONTHLY_DIR}/${MONTH}" "${PREV_MONTHLY_DIR}/${MONTH}"
    fi

    echo "Creating Monthly HTML Report"

    CACHE_FILES=
    for CACHE_FILE in $(ls "${CACHE}"/cache.${YEAR}-${MONTH}-*)
    do
                
        if [ $CACHE_FILES ]
        then
            CACHE_FILES="${CACHE_FILES}:"
        fi
        
        CACHE_FILES="${CACHE_FILES}${CACHE_FILE}"
        
    done
    
    if [ $CACHE_FILES ]
    then
        
        mkdir -p "${MONTHLY_DIR}/${MONTH}"

        nice -n ${NICENESS} "${CALAMARIS}" \
            ${OUTPUT_ARGUMENTS} \
            --no-input \
        	--cache-input-file "${CACHE_FILES}" \
        	--output-path "${MONTHLY_DIR}/${MONTH}"
        echo
        
    fi
    
fi



#########################
# Delete old cache files

if [ $MONTH_ENDED ]
then

    echo "Removing old cache files"
    
    cd "${CACHE}"
    
    if [ ${MONTH##0} -gt 1 ]
    then
        RM_MONTH=$(printf "%0#2d" $((${MONTH##0} - 1)) )
        RM_YEAR=$YEAR
    else
        RM_MONTH=12
        RM_YEAR=$(($YEAR - 1))
    fi
    
    for CACHE_FILE in $(ls "${CACHE}"/cache.${RM_YEAR}-${RM_MONTH}-*)
    do
        rm "${CACHE}/${CACHE_FILE}"
    done
    
fi



###########################
# Copy files to web server 

echo "Copying files to ${REMOTE_HOST}"
nice -n ${NICENESS} $RSYNC \
    --chmod=D=rx --chmod=F=r --chmod=u+w \
    --delete-after \
    --recursive \
    --rsh="${REMOTE_SHELL}" \
    --times \
    "${CURRENT_REPORTS_DIR/%\/}/" \
    "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_PATH/%\/}/" 

Main Menu

Personal tools

Toolbox