Calamaris
Views:
Calamaris is a log analyser for squid. It makes nice reports that management type like. Actually they're very usful in their own right.
This script will run calamaris over the squid access logs every night and build up a collection of Daily, Weekly and Monthly reports going back over the last two years.
It relies on a little perl script to split the access log up in to one log file per day. Most servers rotate their logs in the early hours of the morning. I wanted the reports to represent one whole day from 00:00 hrs to 23:59 hrs
access-log-split.pl:
#!/usr/bin/env perl
# Take the squid log file specified as the first argument when calling this script,
# then split all entries in to seperate files for each day.
use strict;
use warnings;
use File::Basename;
use POSIX;
sub isoDate {
return strftime("%Y-%m-%d", localtime(shift or time));
}
my $logtype = "access";
my $previousDate = '';
my $logfile;
while (<>) {
my $line = $_;
my $timeStamp = 0;
if ($line =~ /^(\d+)\..*/) {
$timeStamp = $1;
}
my $currentDate = isoDate($timeStamp);
if ($currentDate ne $previousDate || !$logfile) {
if ($logfile) {
close($logfile);
}
open $logfile, ">>", "$logtype.$currentDate"
or die "Could not open $logtype.$currentDate: $!";
}
print $logfile $line;
}
calamaris.sh:
#!/bin/bash
#
# A script to generate calamaris log reports from the squid access logs
#
# This script depends on the squid log rotation script,
# /etc/cron.daily/squid.daily.job, and must be run after it.
#
# The script creates daily, weekly and monthly reports. It also keeps
# the last copy of any given day, week or month to allow comaprisons.
#
# The script needs to run each day, it won't catch up if any days are
# missed. If that happens, weekly and monthly reports will still be
# created but won't be accurate.
#
# A simple perl script, "access-log-split.pl", is used to divide log
# data in to one file per day, which is then boiled down by calamaris
# in to daily cache files. These cache files are kept for one month.
################
# Configuration
CALAMARIS=/usr/local/calamaris/calamaris
CONF=/usr/local/calamaris/calamaris.conf
# Logs are picked up from the log archive directory. See /etc/cron.daily/squid.daily.job
LOG_DIR=/var/log/squid
ACCESS_LOG="${LOG_DIR}/access.log"
CACHE_HOSTNAME=squid-cache.example.com
CURRENT_REPORTS_DIR=/var/lib/calamaris/reports
PREVIOUS_REPORTS_DIR="${CURRENT_REPORTS_DIR}/previous"
CACHE=/var/lib/calamaris/cache
BZCAT=`which bzcat`
CAT=`which cat`
LOGSPLIT=/usr/local/bin/access-log-split.pl
RSYNC=`which rsync`
REMOTE_HOST=www.example.com
REMOTE_USER=calamari
REMOTE_PATH=/var/www/html/squid-reports
REMOTE_SHELL="ssh -q -i /var/ib/calamaris/.ssh/id_rsa -o UserKnownHostsFile=/var/lib/calamaris/.ssh/known_hosts"
NICENESS=39
DAILY_REPORT_LABEL=daily
WEEKLY_REPORT_LABEL=weekly
MONTHLY_REPORT_LABEL=monthly
########################
# Options for calamaris
DEFAULT_ARGUMENTS="
--errorcode-distribution-report \
--input-format squid-extended \
--hostname ${CACHE_HOSTNAME} \
--peak-report new \
--response-time-report \
--size-distribution-report 10 \
--status-report \
--type-report-ignore-case"
# When generating cache files, keep don't limit the lenght of reports
CACHE_ARGUMENTS="${DEFAULT_ARGUMENTS} \
--benchmark 50000 \
--domain-report -1 \
--performance-report 60 \
--requester-report -1 \
--type-report -1"
# Limit the length of reports for general consumption - to limit
# privacy issues
OUTPUT_ARGUMENTS="${DEFAULT_ARGUMENTS} \
--benchmark 10000 \
--domain-report 50 \
--requester-report 50 \
--type-report 50 \
--output-format html,graph \
--image-type gif"
############
# functions
# date adjusts to the given number of days in the past and
# formatted using the provided formatting string
# usage: pastDate <minus no. days> <format>
pastDate()
{
if [ ! $1 ]
then
exit 1
fi
if [ ! $2 ]
then
exit 1
fi
if [ `uname` = "Darwin" ]
then
echo $(date -v -"$1"d +"$2")
fi
if [ `uname` = "Linux" ]
then
echo $(date --date="$1 day ago" +"$2")
fi
}
##############################
# Directory Checks / Creation
# Check required directories are writable - try to create them if missing or
# exit.
test -e "${CACHE}" || test -d "${CACHE}" || mkdir "${CACHE}"
if [ ! -d "${CACHE}" -o ! -w "${CACHE}" ]
then
echo "The cache directory, "
echo "'${CACHE}',"
echo "does not exist or is not writable"
exit 1
fi
if [ ! -d "${CURRENT_REPORTS_DIR}" -o ! -w "${CURRENT_REPORTS_DIR}" ]
then
echo "The reports directory, "
echo "'${CURRENT_REPORTS_DIR}',"
echo "does not exist or is not writable"
exit 1
fi
DAILY_DIR="${CURRENT_REPORTS_DIR}/${DAILY_REPORT_LABEL}"
test -e "${DAILY_DIR}" || test -d "${DAILY_DIR}" || mkdir "${DAILY_DIR}"
if [ ! -d "${DAILY_DIR}" -o ! -w "${DAILY_DIR}" ]
then
echo "The daily reports directory, "
echo "'${DAILY_DIR}',"
echo " does not exist or is not writable"
exit 1
fi
WEEKLY_DIR="${CURRENT_REPORTS_DIR}/${WEEKLY_REPORT_LABEL}"
test -e "${WEEKLY_DIR}" || test -d "${WEEKLY_DIR}" || mkdir "${WEEKLY_DIR}"
if [ ! -d "${WEEKLY_DIR}" -o ! -w "${WEEKLY_DIR}" ]
then
echo "The monthly reports directory, "
echo "'${WEEKLY_DIR}',"
echo " does not exist or is not writable"
exit 1
fi
MONTHLY_DIR="${CURRENT_REPORTS_DIR}/${MONTHLY_REPORT_LABEL}"
test -e "${MONTHLY_DIR}" || test -d "${MONTHLY_DIR}" || mkdir "${MONTHLY_DIR}"
if [ ! -d "${MONTHLY_DIR}" -o ! -w "${MONTHLY_DIR}" ]
then
echo "The monthly reports directory, "
echo "'${MONTHLY_DIR}',"
echo " does not exist or is not writable"
exit 1
fi
if [ ! -d "${PREVIOUS_REPORTS_DIR}" -o ! -w "${PREVIOUS_REPORTS_DIR}" ]
then
echo "The previous reports directory, "
echo "'${PREVIOUS_REPORTS_DIR}',"
echo "does not exist or is not writable"
exit 1
fi
PREV_DAILY_DIR="${PREVIOUS_REPORTS_DIR}/${DAILY_REPORT_LABEL}"
test -e "${PREV_DAILY_DIR}" || test -d "${PREV_DAILY_DIR}" || mkdir "${PREV_DAILY_DIR}"
if [ ! -d "${PREV_DAILY_DIR}" -o ! -w "${PREV_DAILY_DIR}" ]
then
echo "The daily reports directory, "
echo "'${PREV_DAILY_DIR}',"
echo " does not exist or is not writable"
exit 1
fi
PREV_WEEKLY_DIR="${PREVIOUS_REPORTS_DIR}/${WEEKLY_REPORT_LABEL}"
test -e "${PREV_WEEKLY_DIR}" || test -d "${PREV_WEEKLY_DIR}" || mkdir "${PREV_WEEKLY_DIR}"
if [ ! -d "${PREV_WEEKLY_DIR}" -o ! -w "${PREV_WEEKLY_DIR}" ]
then
echo "The monthly reports directory, "
echo "'${PREV_WEEKLY_DIR}',"
echo " does not exist or is not writable"
exit 1
fi
PREV_MONTHLY_DIR="${PREVIOUS_REPORTS_DIR}/${MONTHLY_REPORT_LABEL}"
test -e "${PREV_MONTHLY_DIR}" || test -d "${PREV_MONTHLY_DIR}" || mkdir "${PREV_MONTHLY_DIR}"
if [ ! -d "${PREV_MONTHLY_DIR}" -o ! -w "${PREV_MONTHLY_DIR}" ]
then
echo "The monthly reports directory, "
echo "'${PREV_MONTHLY_DIR}',"
echo " does not exist or is not writable"
exit 1
fi
########
# Dates
# these dates relate to the log files, i.e. yesterdays logs and activity.
DAY_OF_WEEK=$(pastDate 1 %w)
WEEK=$(pastDate 1 %U)
MONTH=$(pastDate 1 %m)
YEAR=$(pastDate 1 %Y)
DATE=$(pastDate 1 %F)
PREV_DATE="$(($YEAR - 1))${DATE:(-6)}"
PREV_PREV_DATE="$(($YEAR - 2))${DATE:(-6)}"
# if today is the 1st of the month, then yesterday was the end of the month
MONTH_ENDED=$(
test $(date +%d) = 01 && echo true
)
#############################
# Process and Cache Log Data
cd "${CACHE}"
if [ -f "${ACCESS_LOG}.${DAY_OF_WEEK}.bz2" ]
then
$BZCAT "${ACCESS_LOG}.${DAY_OF_WEEK}.bz2" | $LOGSPLIT
echo "Generating daily cache file"
$CAT "${CACHE}/access.${DATE}" | \
nice -n ${NICENESS} "${CALAMARIS}" \
${CACHE_ARGUMENTS} \
--cache-output-file "${CACHE}/cache.${DATE}" > /dev/null
rm "${CACHE}/access.${DATE}"
echo
else
echo "WARNING: No cache found for ${DATE}"
fi
###########################
# Create Daily HTML Report
if [ -d "${DAILY_DIR}/${PREV_DATE}" ]
then
echo "Moving previous Daily HTML report"
[ -d "${PREV_DAILY_DIR}/${PREV_PREV_DATE}" ] && rm -rf "${PREV_DAILY_DIR}/${PREV_PREV_DATE}"
mv "${DAILY_DIR}/${PREV_DATE}" "${PREV_DAILY_DIR}/${PREV_DATE}"
fi
echo "Creating Daily HTML Report"
if [ -f "${CACHE}/cache.${DATE}" ]
then
mkdir -p "${DAILY_DIR}/${DATE}"
# Get data from the cache file for the most recent day
nice -n ${NICENESS} "${CALAMARIS}" \
${OUTPUT_ARGUMENTS} \
--no-input \
--performance-report 60 \
--cache-input-file "${CACHE}/cache.${DATE}" \
--output-path "${DAILY_DIR}/${DATE}"
echo
else
echo "WARNING: No cache found for ${DATE}"
fi
#######################
# Create Weekly Report
if [ $DAY_OF_WEEK -eq 6 ]
then
if [ -d "${WEEKLY_DIR}/${WEEK}" ]
then
echo "Moving previous Weekly HTML report"
if [ -d "${PREV_WEEKLY_DIR}/${WEEK}" ]
then
rm -rf "${PREV_WEEKLY_DIR}/${WEEK}"
fi
mv "${WEEKLY_DIR}/${WEEK}" "${PREV_WEEKLY_DIR}/${WEEK}"
fi
echo "Creating Weekly HTML Report"
CACHE_FILES=
DATE_ADJUSTMENT=$(( $DAY_OF_WEEK + 1 ))
while [ $DATE_ADJUSTMENT -ge 1 ]
do
CACHE_DATE=$(pastDate $DATE_ADJUSTMENT %F)
if [ -f "${CACHE}/cache.${CACHE_DATE}" ]
then
if [ $CACHE_FILES ]
then
CACHE_FILES="${CACHE_FILES}:"
fi
CACHE_FILES="${CACHE_FILES}${CACHE}/cache.${CACHE_DATE}"
else
echo "WARNING: No cache found for ${CACHE_DATE}"
continue
fi
DATE_ADJUSTMENT=$(( $DATE_ADJUSTMENT - 1 ))
done
if [ $CACHE_FILES ]
then
mkdir -p "${WEEKLY_DIR}/${WEEK}"
nice -n ${NICENESS} "${CALAMARIS}" \
${OUTPUT_ARGUMENTS} \
--no-input \
--cache-input-file "${CACHE_FILES}" \
--output-path "${WEEKLY_DIR}/${WEEK}"
echo
fi
fi
########################
# Create Monthly Report
if [ $MONTH_ENDED ]
then
if [ -d "${MONTHLY_DIR}/${MONTH}" ]
then
echo "Moving previous Monthly HTML report"
if [ -d "${PREV_MONTHLY_DIR}/${MONTH}" ]
then
rm -rf "${PREV_MONTHLY_DIR}/${MONTH}"
fi
mv "${MONTHLY_DIR}/${MONTH}" "${PREV_MONTHLY_DIR}/${MONTH}"
fi
echo "Creating Monthly HTML Report"
CACHE_FILES=
for CACHE_FILE in $(ls "${CACHE}"/cache.${YEAR}-${MONTH}-*)
do
if [ $CACHE_FILES ]
then
CACHE_FILES="${CACHE_FILES}:"
fi
CACHE_FILES="${CACHE_FILES}${CACHE_FILE}"
done
if [ $CACHE_FILES ]
then
mkdir -p "${MONTHLY_DIR}/${MONTH}"
nice -n ${NICENESS} "${CALAMARIS}" \
${OUTPUT_ARGUMENTS} \
--no-input \
--cache-input-file "${CACHE_FILES}" \
--output-path "${MONTHLY_DIR}/${MONTH}"
echo
fi
fi
#########################
# Delete old cache files
if [ $MONTH_ENDED ]
then
echo "Removing old cache files"
cd "${CACHE}"
if [ ${MONTH##0} -gt 1 ]
then
RM_MONTH=$(printf "%0#2d" $((${MONTH##0} - 1)) )
RM_YEAR=$YEAR
else
RM_MONTH=12
RM_YEAR=$(($YEAR - 1))
fi
for CACHE_FILE in $(ls "${CACHE}"/cache.${RM_YEAR}-${RM_MONTH}-*)
do
rm "${CACHE}/${CACHE_FILE}"
done
fi
###########################
# Copy files to web server
echo "Copying files to ${REMOTE_HOST}"
nice -n ${NICENESS} $RSYNC \
--chmod=D=rx --chmod=F=r --chmod=u+w \
--delete-after \
--recursive \
--rsh="${REMOTE_SHELL}" \
--times \
"${CURRENT_REPORTS_DIR/%\/}/" \
"${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_PATH/%\/}/"
