#!/usr/bin/perl
##########################################################################
# $Id: amavis,v 1.48 2007/05/16 04:27:17 mrc Exp $
##########################################################################

##########################################################################
# Amavis-logwatch: written and maintained by:
#
#    Mike "MrC" Cappella <lists-logwatch@cappella.us>
#
# Please send all comments, suggestions, bug reports to the logwatch
# mailing list (logwatch@logwatch.org), or to the email address above.
# I will respond as quickly as possible. [MrC]
#
# All work since Dec 12, 2006 (logwatch CVS revision 1.28)
# Copyright (C) 2006,2007  Mike Cappella
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

##########################################################################
# The original amavis logwatch filter was written by
# Jim O'Halloran <jim@kendle.com.au>, and has had many contributors over
# the years.
#
# CVS log removed: see Changes file for amavis-logwatch at
#    http://www.mikecappella.com/logwatch
# or included with the standalone amavis-logwatch distribution
##########################################################################

use warnings;
no warnings "uninitialized";
use strict;

use Getopt::Long;
use File::Basename;

my $Version         = "1.48.27";
my $progname        =  fileparse($0);
my $progname_prefix = 'amavis';

# report separator characters and widths
my ($fw1,$fw2)   = ( 22, 10);
my ($sep1,$sep2) = ('=', '-');


# Default values for various options.  These are used
# to reset default values after an option has been
# disabled (via undef'ing its value).  This allows
# a report to be disabled via config file or --nodetail,
# but reenabled via subsequent command line option
my %Defaults = (
   sarules             => "20 20",
   score_frequencies   => "-10 -5 0 5 10 20 30",
   score_percentiles   => "0 50 90 95 98 100",
   timings_percentiles => "0 10 25 50 75 90 100",
);

# Comamnd line options : config file variable
my %Opts = (
   detail              => 10,        # report level detail
   max_report_width    => 100,       # maximum line width for report output    : amavis_max_report_width
   syslog_name         => 'amavis',  # service name in syslog                  : amavis_syslog_name

   timings             => 95,        # show top N% of the timings report       : amavis_timings
   timings_percentiles =>            # percentiles shown in timing report      : amavis_timing_percentiles
                          $Defaults{'timings_percentiles'},
   score_percentiles   =>            # percentiles shown in spam scores report : amavis_score_percentiles
                          $Defaults{'score_percentiles'},
   score_frequencies   =>            # buckets shown in spam scores report     : amavis_score_frequencies
                          $Defaults{'score_frequencies'}, 
   sarules             =>            # show SpamAssassin rules hit             : amavis_sarules
                          $Defaults{'sarules'},
   bayes               => 1,         # show hit Bayesian buckets               : amavis_bayes
   startinfo           => 1,         # show amavis startup info                : amavis_startinfo
);

# The amavis-logwatch.conf file is used only in
# standalone mode, and contains configuration variables
# set prior to command line variables.
my $config_file = "/usr/local/etc/${progname_prefix}-logwatch.conf";

# Logwatch passes a filter's options via environment variables.
# When running standalone (w/out logwatch), use command line options
#
my $standalone = $ENV{LOGWATCH_DETAIL_LEVEL} eq '' ? 1 : 0;

unless ($standalone) {
   $Opts{'detail'} = $ENV{LOGWATCH_DETAIL_LEVEL};

   if ($Opts{'detail'} < 10) {
      $Opts{'startinfo'}        = 0;
   } elsif ($Opts{'detail'} < 5) {
      $Opts{'timings'}          = 0;
   } else {
      # increase defaults for max detail in logwatch, subject to config file override
      $Opts{'timings'}          = 100;    
      $Opts{'sarules'}          = 'all all';
   }
}

# Totals and Counts are the log line accumulators.
# Totals: maintains section grand total for use in Summary section
# Counts: maintains per-level key totals
my (%Totals, %Counts);

my $OrigLine;     # used globally
my (%Timings, @TimingsTotals);
my (%SaveLine, %UnmatchedList, %StartInfo);
my (@SpamScores, %spamtags);

# Notes:
#
#   IN REs, always use /o option at end of RE when RE uses interpolated vars

# IPv4 only
#my $re_IP      = '(?:\d{1,3}\.){3}(?:\d{1,3})';

# IPv4 and IPv6
# See syntax in RFC 2821 IPv6-address-literal,
# eg. IPv6:2001:630:d0:f102:230:48ff:fe77:96e
my $re_IP      = '(?:(?:::(?:ffff:|FFFF:)?)?(?:\d{1,3}\.){3}\d{1,3}|(?:(?:IPv6:)?[\da-fA-F]{0,4}:){2}(?:[\da-fA-F]{0,4}:){0,5}[\da-fA-F]{0,4})';

sub usage($);
sub version($);
sub commify($);
sub inc_unmatched($ $);
sub get_vars_from_file($);
sub env_to_cmdline(\%);
sub buildTree(\% $ $);
sub printTree($ $);
sub printSummaryReport (\@);
sub printDetailReport (\@);
sub getpercentiles(\@ @);
sub getfrequencies(\@ @);
sub process_config_file($ $);
sub parse_vals($ $);
sub zero_opts( );
sub triway_opts($ $);

sub printSpamScorePercentilesReport;
sub printSpamScoreFrequencyReport;
sub printSARulesReport;
sub printTimingsReport;
sub printStartupInfoReport;
sub strip_trace($);

# References to these are used in the Sections table below; we'll predeclare them.
$Totals{'TotalMsgs'} = 0;

#
# The Sections table drives reports.  For each entry in the table, a summary line and/or
# detailed report section is a candidate for output, depending upon logwatch Detail
# level, and .conf configuration variables.  Each entry below has four fields:
#
#   1: Key to %Counts and %Totals accumulator hashes
#   2: Row output format specifier: d=single integer, d1 or d2 indicates integer
#      in column 1 or 2 respectively, or Z to show both a unitized integer and its
#      comma-fied equivalent.
#   3: Summary and Section Title
#   4: A hash to a divisor used to calculate the percentage of a total for that key
#
# Alternatively, when field 1 contains a single character, this character will
# cause a line filled with that character to be output, but only if there was
# output for that section.
# The special name '__SECTION' is used to indicate the beginning of a new section.
# This ensures the printSummaryReport and printDetailReport routines do not print
# needless horizontal lines.
#
my @Sections = (
   # Place configuration and critical errors first

   [ '__SECTION' ],
   [ 'Fatal',                   'd',  '*Fatal' ],
   [ 'Panic',                   'd',  '*Panic' ],
   [ 'WarningSecurity',         'd',  '*Warning: Security risk' ],
   [ 'AVTimeout',               'd',  '*Warning: Virus scanner timeout' ],
   [ 'AVConnectFailure',        'd',  '*Warning: Virus scanner connection failure' ],
   [ 'WarningSmtpShutdown',     'd',  '*Warning: SMTP shutdown' ],
   [ 'WarningSQL',              'd',  '*Warning: SQL problem' ],
   [ 'WarningAddressModified',  'd',  '*Warning: Email address modified' ],
   [ 'WarningNoQuarantineID',   'd',  '*Warning: Message missing X-Quarantine-ID header' ],
   [ 'Warning',                 'd',  'Miscellaneous warnings' ],
   [ '=' ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'CleanPassed',             'd1', 'Clean passed',                     \$Totals{'TotalMsgs'} ],
   [ 'CleanBlocked',            'd1', 'Clean blocked',                    \$Totals{'TotalMsgs'} ],
   [ 'BadHeaderPassed',         'd1', 'Bad header passed',                \$Totals{'TotalMsgs'} ],
   [ 'BadHeaderBlocked',        'd1', 'Bad header blocked',               \$Totals{'TotalMsgs'} ],
   [ 'TotalHams',               'd2', 'Ham',                              \$Totals{'TotalMsgs'} ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'SpamPassed',              'd1', 'Spam passed',                      \$Totals{'TotalMsgs'} ],
   [ 'SpamBlocked',             'd1', 'Spam blocked',                     \$Totals{'TotalMsgs'} ],
   [ 'SpamDiscarded',           'd1', 'Spam discarded (not quarantined)', \$Totals{'TotalMsgs'} ],
   [ 'TotalSpams',              'd2', 'Spam',                             \$Totals{'TotalMsgs'} ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'MalwarePassed',           'd1', 'Malware passed',                   \$Totals{'TotalMsgs'} ],
   [ 'MalwareBlocked',          'd1', 'Malware blocked',                  \$Totals{'TotalMsgs'} ],
   [ 'TotalMalware',            'd2', 'Malware',                          \$Totals{'TotalMsgs'} ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'BannedNamePassed',        'd1', 'Banned file name passed',          \$Totals{'TotalMsgs'} ],
   [ 'BannedNameBlocked',       'd1', 'Banned file name blocked',         \$Totals{'TotalMsgs'} ],
   [ 'TotalBanned',             'd2', 'Banned',                           \$Totals{'TotalMsgs'} ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'UncheckedPassed',         'd1', 'Unchecked passed',                 \$Totals{'TotalMsgs'} ],
   [ 'UncheckedBlocked',        'd1', 'Unchecked blocked',                \$Totals{'TotalMsgs'} ],
   [ 'TotalUnchecked',          'd2', 'Unchecked',                        \$Totals{'TotalMsgs'} ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'MTABlocked',              'd1', 'MTA blocked',                      \$Totals{'TotalMsgs'} ],
   [ 'OversizedBlocked',        'd1', 'Oversized blocked',                \$Totals{'TotalMsgs'} ],
   [ 'TempfailPassed',          'd1', 'Tempfail passed',                  \$Totals{'TotalMsgs'} ],
   [ 'TempfailBlocked',         'd1', 'Tempfail blocked',                 \$Totals{'TotalMsgs'} ],
   [ 'OtherBlocked',            'd1', 'Other blocked',                    \$Totals{'TotalMsgs'} ],
   [ 'TotalOther',              'd2', 'Other',                            \$Totals{'TotalMsgs'} ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'TotalMsgs',               'd2', 'Total messages scanned',           \$Totals{'TotalMsgs'} ],
   [ 'BytesScanned',            'Z',  'Total bytes scanned' ],     # Z means print scaled as in 1k, 1m, etc.
   [ '=' ],
   [ '\n' ],

   [ '__SECTION' ],
   [ 'SABypassed',              'd',  'SpamAssassin bypassed' ],
   [ 'SATimeout',               'd',  'SpamAssassin timeout' ],
   [ 'Released',                'd',  'Released from quarantine' ],
   [ 'TruncatedHeader',         'd',  'Truncated headers > 998 characters' ],
   [ 'SmtpResponse',            'd',  'SMTP response' ],
   [ 'BadAddress',              'd',  'Bad address syntax' ],
   [ 'FakeSender',              'd',  'Fake sender' ],
   [ 'ArchiveExtract',          'd',  'Archive extraction problem' ],
   [ 'DSNNotification',         'd',  'DSN notification (debug supplemental)' ],
   [ 'NoDSNSentBad',            'd',  'DSN not sent: bad DSN' ],
   [ 'NoDSNSentCutoff',         'd',  'DSN not sent: spam score > DSN cutoff' ],
   [ 'NoDSNSentFaked',          'd',  'DSN not sent: presumed bogus sender' ],
   [ 'NoSubject',               'd',  'Subject header inserted' ],
   [ 'Whitelisted',             'd',  'Whitelisted' ],
   [ 'Blacklisted',             'd',  'Blacklisted' ],
   [ 'TmpPreserved',            'd',  'Preserved temporary directory' ],
   [ 'DccError',                'd',  'DCC error' ],
   [ 'MimeError',               'd',  'MIME error' ],
   [ 'BadHeaderSupp',           'd',  'Bad header (debug supplemental)' ],
   [ 'FileOutputSkipped',       'd',  'File(1) output skipped' ],
   [ 'LocalDeliverySkipped',    'd',  'Local delivery skipped' ],
   [ 'ExtraModules',            'd',  'Extra code modules loaded at runtime' ],
   [ 'MalwareByScanner',        'd',  'Malware by scanner' ],
   [ 'ContentType',             'd',  'Content types' ],
   [ 'Bayes',                   'd',  'Bayes probability' ],
);

# Initialize the Getopts option list
my   @format_opts = ();
push @format_opts, 'help',                   sub { print STDOUT usage(undef); exit 0 };
push @format_opts, 'nodetail',               \&zero_opts;
push @format_opts, 'nosummary';
push @format_opts, 'version',                \&version;;
push @format_opts, 'debug';
push @format_opts, 'detail=i';
push @format_opts, 'max_report_width=i';
push @format_opts, 'syslog_name=s';
push @format_opts, 'config_file=s',          \&process_config_file;
push @format_opts, 'showstartinfo=i',        \$Opts{'startinfo'};
push @format_opts, 'startinfo!';
push @format_opts, 'noscore_percentiles',    \&triway_opts;
push @format_opts, 'score_percentiles=s',    \&triway_opts;
push @format_opts, 'noscore_frequencies',    \&triway_opts;
push @format_opts, 'score_frequencies=s',    \&triway_opts;
push @format_opts, 'notimings',              sub { $Opts{'timings'} = 0; };
push @format_opts, 'timings=i';
push @format_opts, 'timings_percentiles=s';
push @format_opts, 'nosarules',              \&triway_opts;
push @format_opts, 'sarules=s',              \&triway_opts;

# Continue building the Getopts option list from the keys
# in the Sections list. Any option that matches a key in the Sections list
# controls the max print level for that section.
foreach ( @Sections ) {
   # ignore output formatting specifiers
   next if ($_->[0] =~ /^.$/);    
   next if ($_->[0] =~ /^\\n$/);
   next if ($_->[0] =~ /^__/);

   # all Sections-derived options are float (integer.integer)
   my $key = "\L$_->[0]";
   push @format_opts, "${key}=f";
   # allow --noOPT variant, which sets level to 0
   push @format_opts, "no${key}", sub {$Opts{$key} = 0};
}

# All options are placed into, and processed from ARGV.
# Most recently seen options override earlier options.
#
if ($standalone) {
   # In standalone mode, obtain options specified in the default
   # logwatch-style configuration file, but only if an alternate
   # configuration file wasn't specified via the command line 
   # --config_file option (which will be processed during the call
   # to GetOptions below)
   # XXX - FixMe: the grep won't work when an abbreviated --config_file
   # command line argument is used (eg. --confi).
   if ( grep (/--config_file/, @ARGV) == 0 and -f "$config_file") {
      my $href = get_vars_from_file($config_file);
      #foreach (keys %$href) { print "V: $_, $href->{$_}\n"; };
      unshift @ARGV, env_to_cmdline(%$href);
   }
} else {
   # logwatch passes all config vars via environment variables 
   @ARGV=env_to_cmdline(%ENV);
}

#print "ARGC: ", scalar @ARGV, ", ARGV: @ARGV\n";
#$Getopt::Long::debug = 1;

if (GetOptions (\%Opts, @format_opts) == 0) {
   print STDERR "Use $progname --help for options\n";
   exit 1;
}


#map { print "KEY: $_ => $Opts{$_}\n"}  keys %Opts;
#print "ARGC: ", scalar @ARGV, "ARGV: @ARGV\n";

# Main processing loop
#
while (<>) {
   my $p1 = $_;
   my ($p2, $pid);

   my $action = "Blocked";    # default action is blocked if not present in log

   chomp ($p1);
   $OrigLine = $p1;

   if ($standalone) {
      next unless $p1 =~ s/^... .. ..:..:.. (?:<[^>]+> )?\S+ $Opts{'syslog_name'}\[\d+\]: //;
   }

   # For now, ignore the amavis startup timing lines.  Need to do this
   # before stripping out the amavis pid to differentiate these from the
   # scan timing reports
   next if ($p1 =~ /^TIMING/);

   my $linelen = length $p1;
   # Strip amavis process id-instance id, or release id
   if (($pid,$p2) = ($p1 =~ /^\(([^)]+)\) (.*)$/ )) {
      $p1 = $p2;
   }

   # Handle continuation lines.  This assumes continuation lines
   # are in increasing order per PID, meaning line1, line2, line3,
   # but never line3, line1, line2.
   #
   # ... a continued line
   if ($p1 =~ /^\.\.\./) {
      if (!exists($SaveLine{$pid})) {
         #printf "Unexpected continue line: \"%s\"\n", $p1;
         $SaveLine{$pid} = '';
      }
      $p1 =~ /^\.\.\.(.*)$/;  $p1 = $SaveLine{$pid} . $1;
      $SaveLine{$pid} = $p1;
   }

   # this line continues ...
   #
   # Mark says...
   # The log lines as chopped by sub write_log are exactly 980 characters long starting with '(' as in:
   #  amavis[47061]: (47061-15) SPAM, etc  ...
   #                 ^ <-----980------------->
   if ($p1 =~ /\.\.\.$/ and $linelen == 980) {
      $p1 =~ /^(.*)\.\.\.$/;  $SaveLine{$pid} = $1;
      next;
   }

   if (exists($SaveLine{$pid})) {
      # printf "END OF SaveLine: %s\n", $SaveLine{$pid};
      $p1 = delete $SaveLine{$pid};
   }

   #if (length($p1) > 10000) {
   #   printf "Long log entry %d chars: \"%s\"\n", length($p1), $p1;
   #   next;
   #}

   #print "p1: \"$p1\"\n";

   next if (
       # We don't care about these
           ($p1 =~ /^do_ascii/) 
        or ($p1 =~ /^Found av scanner/) 
        or ($p1 =~ /^Found myself/)
        or ($p1 =~ /^Checking/)
        or ($p1 =~ /^(?:\(!+\))?(ESMTP|FWD|SEND) via /)       # log level 4
        or ($p1 =~ /^Not-Delivered/)
        or ($p1 =~ /^SpamControl/)
        or ($p1 =~ /^Perl/)
        or ($p1 =~ /^ESMTP/)
        or ($p1 =~ /^tempdir being removed/)
        or ($p1 =~ /^mail_via_smtp/)
        or ($p1 =~ /^local delivery: /)
        or ($p1 =~ /^do_notify_and_quarantine: .*ccat/)
        or ($p1 =~ /^cached [a-zA-Z0-9]+ /)
        or ($p1 =~ /^loaded policy bank/)
        or ($p1 =~ /^policy protocol: [^=]+=\S+(?:,\S+)*$/)   # allow "policy protocol: INVALID ..." later
        or ($p1 =~ /^p\.path/)
        or ($p1 =~ /^Requesting (a |)process rundown after [0-9]+ tasks/)
        or ($p1 =~ /^INFO: unfolded \d+ illegal all-whitespace continuation line/)
        or ($p1 =~ /^Cached (virus|spam) check expired/)
        or ($p1 =~ /^pr(?:esent|ovid)ing full original message to scanners as/)  # log level 2
        or ($p1 =~ /^Actual message size [0-9]+ B(,| greater than the) declared [0-9]+ B/)
        or ($p1 =~ /^disabling DSN/)
        or ($p1 =~ /^virus_scan: /)
        #or ($p1 =~ /^virus_scan: \(bad jpeg: Invalid marker segm len/)
        or ($p1 =~ /^Virus [^,]+ matches [^,]+, sender addr ignored/)
        or ($p1 =~ /^Not calling virus scanners, no files to scan in/)
        or ($p1 =~ /^lookup_ip_acl /)
        or ($p1 =~ /^lookup_acl/)
        or ($p1 =~ /^lookup .* does not match$/)
        or ($p1 =~ /^release /)
        or ($p1 =~ /^Waiting for the process \S+ to terminate/)
        or ($p1 =~ /^Valid PID file \(younger than sys uptime/)
        or ($p1 =~ /^Sending SIG\S+ to amavisd/)
        or ($p1 =~ /^Can't send SIG\S+ to process/)
        or ($p1 =~ /^killing process/)
        or ($p1 =~ /^no need to kill process/)
        or ($p1 =~ /^process .* is still alive/)
        or ($p1 =~ /^Daemon \[\d+\] terminated by SIG/)
        or ($p1 =~ /^TIMING.*got data/)    # skip amavis release timing
        or ($p1 =~ /^OS_fingerprint: /)
        or ($p1 =~ /^run_as_subprocess: child process \S*: Broken pipe/)
        or ($p1 =~ /^Sophie broken pipe \(don't worry\), retrying/)
        or ($p1 =~ /^penpals: (bonus|prev Subject:|this Subject:) /)
        or ($p1 =~ /^adding SA score \S+ to existing/)
        or ($p1 =~ /^Turning AV infection into a spam report:/)
        or ($p1 =~ /^The amavisd daemon is already running/)
        or ($p1 =~ /^parse_message_id/)
        or ($p1 =~ /^AUTH not needed/)
        or ($p1 =~ /^Using [^:]+: \(built-in interface\)/)
        or ($p1 =~ /^load: \d+ %, total idle/)
        or ($p1 =~ /^process_request:/)
        or ($p1 =~ /^NOTICE: Disconnected from SQL server/) # redundant
        or ($p1 =~ /^Maia:/)   # redundant
        or ($p1 =~ /^storage and lookups will use the same connection to SQL/)
        or ($p1 =~ /^switch_to_client_time/)
        or ($p1 =~ /^parse_received: /)
        or ($p1 =~ /^fish_out_ip_from_received: /)
        or ($p1 =~ /^idle_proc, /)
        or ($p1 =~ /^switch_to_my_time/)
        or ($p1 =~ /^TempDir::strip: /)
        or ($p1 =~ /^rmdir_recursively/)
        or ($p1 =~ /^sending SMTP response: /)
        or ($p1 =~ /^exiting process_request/)
        or ($p1 =~ /^prolong_timer/)
        or ($p1 =~ /^post_process_request_hook: /)
        or ($p1 =~ /^SMTP session over/)
        or ($p1 =~ /^updating snmp variables/)
        or ($p1 =~ /^best_try_originator_ip/)
        or ($p1 =~ /^mail checking ended: /) # log level 2
        or ($p1 =~ /^save_info_preliminary/) # log level 4
        or ($p1 =~ /^save_info_final/)       # log level 4
        or ($p1 =~ /^AM\.PDP  /) # this appears to be always two spaces
                                 # because in amavisd::preprocess_policy_query() when $ampdp is
                                 # set, it will pass an unset $attr_ref->{'mail_id'} to do_log(1
        or ($p1 =~ /^sql_storage: retrying/)
        or ($p1 =~ /^CALLING SA check$/)
        or ($p1 =~ /^timer set to \d+/)
        or ($p1 =~ /^calling SA parse,/)
        or ($p1 =~ /^lookup_hash/)
        or ($p1 =~ /^lookup_re/)
        or ($p1 =~ /^query_keys/)
        or ($p1 =~ /^find_or_save_addr: /)
        or ($p1 =~ /^header: /)
        or ($p1 =~ /^DO_QUARANTINE, /)
        or ($p1 =~ /^DEBUG_ONESHOT: /)
        or ($p1 =~ /^TempDir::/)
        or ($p1 =~ /^check_mail_begin_task: /)

        # non-begin anchored
        or ($p1 =~ /email\.txt no longer exists, can't re-use it/)
        or ($p1 =~ /SPAM\.TAG2/)
        or ($p1 =~ /BAD-HEADER\.TAG2/)
        or ($p1 =~ /: Connecting to socket/)
        or ($p1 =~ /broken pipe \(don't worry\), retrying/)
   );

   my ($ip, $from, $to, $key, $hits, $reason, $item, $decoder, $scanner, $malware, $stage);

   # Coerce older "INFECTED" quarantined lines into "Blocked INFECTED",
   # to be processed in the Passed/Blocked section.
   if ($p1 =~ /^INFECTED.*, quarantine/) {
      $p1 = 'Blocked ' . $p1;
   }

   # SPAM entry occurs at kill level
   # SPAM-TAG entry occurs at log level 2, when spam header is inserted
   # log_level >= 2 || (log_level > 2 && syslog_priority=debug)
   my ($tagtype,$fromto,$isspam,$tags,$tests,$autolearn);
   if (($tagtype,$fromto,$isspam,$tags,$tests,$autolearn) = ($p1 =~ /^(SPAM(?:-TAG)?), (.*), (Yes|No), score=[-+x\d.]+(.*) tests=\[([^\]]*)](?:, autolearn=(\w+))?/) or
       ($tagtype,$fromto,$isspam,$tags,$tests) =            ($p1 =~ /^(SPAM(?:-TAG)?), (.*), (Yes|No), hits=[-+x\d.]+(.*) tests=(.*), quarantine /)) {

      #TD SPAM, <from@example.com> -> <to@sample.com>, Yes, score=17.709 tag=-10 tag2=6.31 kill=6.31 tests=[AWL=-0.678, BAYES_99=4], autolearn=spam, quarantine Cc4+GUJhgpqh (spam-quarantine)
      #TD SPAM, <from@example.com> -> <to@sample.net>, Yes, score=21.161 tag=x tag2=8.15 kill=8.15 tests=[BAYES_99=2.5, FORGED_RCVD_HELO=0.135], autolearn=no, quarantine m6lWPoTGJ2O (spam-quarantine)
      #TD SPAM, <from@example.com> -> <to@sample.net>, Yes, score=17.887 tag=-10 tag2=6.31 kill=6.31 tests=[BAYES_99=4], autolearn=spam, quarantine VFYjDOVTW4zd (spam-quarantine)
      #TD SPAM-TAG, <from@example.com> -> <to@sample.net>, No, score=-0.069 tagged_above=-10 required=6.31 tests=[BAYES_00=-2.599, FROM_ENDS_IN_NUMS=2.53]
      #TD SPAM-TAG, <from@example.com> -> <to@sample.net>, No, score=-1.294 required=8.15 tests=[BAYES_00=-2.599, FROM_LOCAL_HEX=1.305]
      # amavisd-new-20030616
      #TD SPAM, <from@example.com> -> <to@sample.net>, Yes, hits=8.1 tag1=-999.0 tag2=7.0 kill=7.0 tests=MANGLED_TAKE, UPPERCASE_25_50, quarantine spam-14156-09 (maia-spam-quarantine) 

      if ($tests) {
         my $type = $isspam =~ /^Y/ ? 'Spam' : 'Ham';

         # Note: A SPAM line may be followed by an almost identical SPAM-TAG line.  To avoid double counting,
         # maintain a list of (abbreviated) SPAM tag lines keyed by pid.  Since pid's are recycled,
         # maintain an approximation of uniqueness by combining several components from the log
         # line (we can't use the date information, as in logwatch, it is not present).
         # XXX: It is safe to delete an entry when the final Passed/Block line occurs

         #TD SPAM, <from@example.com> -> <to@sample.net>, Yes, score=34.939 tag=x tag2=6.31 kill=6.31 tests=[DATE_IN_FUTURE_03_06=1.961], autolearn=disabled
         #TD SPAM-TAG, <from@example.com> -> <to@sample.net>, Yes, score=34.939 required=6.31 tests=[DATE_IN_FUTURE_03_06=1.961]

         my $tagstr = $fromto . '/' . $isspam . '/' . $tests;
         if ($tagtype eq 'SPAM-TAG' and exists $spamtags{$pid}) {
            next if ($spamtags{$pid} eq $tagstr);
         }
         $spamtags{$pid} = $tagstr;

         #for (split /=[^,]+(?:, +|$)/, $tests) 
         for (split /, +/, $tests) {
            my ($id,$val) = split /=/;
            $val = 0 if ($id eq 'DKIM_POLICY_SIGNSOME');
            $val = '-'    if ($id eq 'AWL');
            $Counts{'SArules'}{$type}{sprintf "%6s %s", $val,$id}++;
            $Counts{'Bayes'}{$id}++   if ($id =~ /^BAYES_\d+$/);
         }
         #autolearn= is available only at ll>=3 or SPAM messages; so ham should never occur here?
         #no, ham, spam, unavailable
         #$Counts{'Autolearn'}{$type}{$_}
      }
   }
   # Passed or Blocked
   elsif (($action) = ($p1 =~ /^(Passed|Blocked)(.*)/ )) {
      ($p1 = $2) =~ s/^\s+//;
=pod
       2.3.1
       <>,<info@example.com>,Passed,Hits=-3.3,Message-ID=<200506440.1.sample.net>,Size=51458 

       20030616p10-5
       Not-Delivered, <from@example.com> -> <to@localhost>, quarantine spam-ea32770-03, Message-ID: <BAA618FE2CB585@localhost>, Hits: 9.687 
=cut
      # CleanPassed, CleanBlocked
      if (($ip, $from, $to, $hits) = ($p1 =~ /^(?:CLEAN)?,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.*?)[)>],.*Hits: ([-+.\d]+)/o)) {
         #TD Passed CLEAN, [207.7.156.4] [207.7.156.4] <from@example.com> -> <to@sample.net>, Message-ID: <2qxz191@example.com>, mail_id: w4DHD8, Hits: -2.599, size: 3045, queued_as: 2056, 2664 ms
         # amavis 20030616p10-5
         #TD Passed, <from@example.com> -> <to@sample.net>, Message-ID: <652.44494541@example.com>, Hits: 4.377
         #TD Passed, <from@example.com> -> <to@sample.net>, Message-ID: <B5C@example.com>, Hits: - 
         if ($hits ne '-') {
            if ($hits =~ /^-?[.\d]+[-\+][.\d]+$/) {
               $hits = eval $hits;
            }
            push @SpamScores, $hits;
         }
         else {
            $Totals{'SABypassed'}++;
         }
         $Totals{"Clean$action"}++;
         if ($action eq 'Blocked') {
            $from = '<>' if ($from eq '');
            $ip = '*unknown IP' if ($ip eq '');
            $Counts{"Clean$action"}{"\L$to"}{$ip}{$from}++
         }
      }

      # SpamPassed, SpamBlocked
      elsif (($ip, $from, $to, $hits) = ( $p1 =~ /^SPAM(?:MY)?,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.*?)[)>],.*Hits: ([-+.\d]+),/o )) {
         #TD Blocked SPAM, [10.0.0.1] [192.168.0.1] <bogus@example.com> -> <to@sample.net>, quarantine: spam-EzEbE9W, Message-ID: <117894@example.com>, mail_id: EzEbE9W, Hits: 6.364, size: 16493, 6292 ms
         #TD Blocked SPAM, LOCAL [10.0.0.1] [10.0.0.2] <bogus@example.com> -> <to@sample.net>, quarantine: spam-EzEbE9W, Message-ID: <110394@example.com>, mail_id: EzEbE9W, Hits: 6.364, size: 16493, 6292 ms
         #TD Blocked SPAM, [IPv6:2001:630:d0:f102:230:48ff:fe77:96e] [192.168.0.1] <joe@example.com> -> <user@sample.net>, quarantine: spam-EzEbE9W, Message-ID: <11780394@example.com>, mail_id: EzEbE9W, Hits: 6.364, size: 16493, 6292 ms
         #TD Passed SPAMMY, ORIGINATING/MYNETS LOCAL [10.0.0.1] [10.0.0.1] <from@example.com> -> <to1@sample.net>,<to2@sample.net>, quarantine: spam-EzEbE9W, Message-ID: <11780394@example.com>, mail_id: EzEbE9W, Hits: 6.364, size: 16493, 6292 ms
         #TD Blocked SPAM, B-BANK/C-BANK/B-BANK [10.0.0.1] [10.0.0.1] <from@sample.net> -> <to@example.com>, quarantine: spam-EzEbE9W, Message-ID: <11780394@example.com>, mail_id: EzEbE9W, Hits: 6.364, size: 16493, 6292 ms
         #TD Blocked SPAM, [10.0.0.1] [10.0.0.1] <from@example.com> -> <to@sample.net>, quarantine: spam-AV49p5, Message-ID: <1.007@sample.net>, mail_id: AV49p5, Hits: 7.487, size: 27174, 4406 ms
         #TD Passed SPAM, MYNETS <root@example.com> -> <root@example.com>, quarantine: spam-V3Wq, Message-ID: <220.1B@example.com>, mail_id: V3Wq, Hits: 7, size: 8838, queued_as: C63EC, 18 ms
         #TD Passed SPAM, <> -> <"kevin).danby"@domain.tld>, Message-ID: <200801180104.CAA23669@aserver.sub.adomain.tld>, mail_id: 6AzQ1g0l5RgP, Hits: 9.061, size: 5555, queued_as: C1840506CB8, 8766 ms

         # XXX can null IPs occur? they shouldn't...
         # print "Spam$action: ip: \"$ip\", From: \"$from\", To: \"$to\"\n";

         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');

         if ($hits ne '-') {
            if ($hits =~ /^(-?[.\d]+[-+][.\d]+)$/) {
               $hits = eval $hits;
            }
            push @SpamScores, $hits;
         }
         else {
            $Totals{'SABypassed'}++;
         }

         $Totals{"Spam$action"}++;
         #$Counts{"Spam$action"}{$ip}{"\L$to"}{$from}++;
         # XXX make this runtime dynamic based on config
         # uncomment to group by To rather than ip
         $Counts{"Spam$action"}{"\L$to"}{$ip}{$from}++;
      }

      # MalwarePassed, MalwareBlocked
=pod
   xxx very old
      Virus found - quarantined|
   amavisd-new-20030616
      INFECTED (JS/IllWill-A), <from@[127.0.0.1]> -> <to@sample.net>, quarantine virus-20040811-207-0-03, Message-ID: <0440.5577-101@sample.net>, Hits: -
      INFECTED (Exploit.HTML.IFrame, Worm.SomeFool.P), <from@sample.net> -> <to@example.com>,<to2@example.com>, quarantine qiO2ZG4K, Message-ID: <200608.5A5@mail.example.com>, Hits: -

      xxx (?:(Passed|Blocked) )?INFECTED \(([^\)]+)\),[A-Z .]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(]([^>)]*)[>)] -> [(<]([^(<]+)[(>]/o ))
=cut
      #XXX elsif (($action, $key, $ip, $from, $to) = ( $p1 =~ /^(?:Virus found - quarantined|(?:(Passed|Blocked) )?INFECTED) \(([^\)]+)\),[A-Z .]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(]([^>)]*)[>)] -> [(<]([^(<]+)[(>]/o ))
      elsif (($key, $ip, $from, $to) = ( $p1 =~ /^INFECTED \(([^\)]+)\),[A-Z .]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.+?)[(>]/o )) {
         #TD Blocked INFECTED (HTML.Phishing.Bank-43), [198.168.0.1] [10.0.0.1] <bogus@example.com> -> <to@sample.net>, 
         #TD Blocked INFECTED (Trojan.Downloader.Small-9993), LOCAL [10.0.0.2] [10.0.0.2] <bogus@example.net> -> <to@example.com>, 

         # print "Key: \"$key\", ip: \"$ip\", From: \"$from\", To: \"$to\"\n";

         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"Malware$action"}++;
         $Counts{"Malware$action"}{$key}{"\L$to"}{$ip}{$from}++;
      }

      # BannedNamePassed, BannedNameBlocked
      #XXX elsif (($action, $item, $ip, $from, $to) = ( $p1 =~ /^(?:(Blocked|Passed) )?BANNED (?:name\/type )?\((.+)\),[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(]([^>)]*)[>)] -> [(<]([^(<]+)[(>]/o)) 
      elsif (($item, $ip, $from, $to) = ( $p1 =~ /^BANNED (?:name\/type )?\((.+)\),[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.+?)[(>]/o)) {
         # the first IP is the envelope sender.
         #TD Blocked BANNED (multipart/report | message/partial,.txt), [192.168.0.1] [10.0.0.2] <> -> <someuser@sample.net>
         #TD Blocked BANNED (multipart/report | message/partial,.txt), LOCAL [192.168.0.1] [10.0.0.2] <> -> <someuser@sample.net>
         #TD Blocked BANNED (multipart/mixed | application/octet-stream,.asc,=?iso-8859-1?Q?FTP=5FFile=5F (1)=File(1).reg), [192.168.0.0] [192.168.0.0] <from@example.com> -> <to@sample.us>, 
         # print "Item: \"$item\", ip: \"$ip\", From: \"$from\", To: \"$to\"\n";

         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"BannedName$action"}++;
         $Counts{"BannedName$action"}{"\L$to"}{$item}{$ip}{$from}++;
      }

      # BadHeaderPassed, BadHeaderBlocked
=pod
      #XXX elsif (($action, $ip, $from, $to) = ( $p1 =~ /^(?:(Blocked|Passed) )?BAD-HEADER,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [(<]([^>)]*)[)>](?: -> [(<]([^>)]+)[)>])[^:]*/o ))
       amavis 2.3.1
       BAD-HEADER, <> -> <info@example.com>, Message-ID: <200506440.1.sample.net>, Hits=-3.3 tag1=3.0 tag2=7.5 kill=7.5, tests=ALL_TRUSTED=-3.3, [10.0.0.1] 
=cut
      elsif (($ip, $from, $to) = ( $p1 =~ /^BAD-HEADER,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [(<](.*?)[)>](?: -> [(<](.+?)[)>])[^:]*/o )) {
         #TD Passed BAD-HEADER, [192.168.0.1] [10.0.0.2] <bogus@example.com> -> <someuser@sample.net>
         #TD Passed BAD-HEADER, LOCAL [192.168.0.1] [10.0.0.2] <bogus@example.com> -> <someuser@sample.net>
         #TD Passed BAD-HEADER, MYNETS AM.PDP [127.0.0.1] [127.0.0.1] <bogus@example.com> -> <someuser@sample.net>
         #TD Passed BAD-HEADER, ORIGINATING/MYNETS LOCAL [10.0.0.1] [10.0.0.1] <from@sample.net> -> <to1@sample.net>,<to2@sample.net>,<to3@example.com>, 
         #TD Passed BAD-HEADER, [10.0.0.1] [10.0.0.2] <from@example.com> -> <to@sample.net>, quarantine: badh-lxR, Message-ID: <7fm@example.com>, mail_id: lxR, Hits: -2.292, size: 422, queued_as: E3B, 981 ms
         # print "Bad Header: ip: \"$ip\", From: \"$from\", To: \"$to\"\n";

         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"BadHeader$action"}++;
         $Counts{"BadHeader$action"}{"\L$to"}{$ip}{$from}++;
      }

      # UncheckedPassed, UncheckBlocked
      #XXXX  elsif (($action, $ip, $from, $to) = ( $p1 =~ /^(?:(Passed|Blocked) )?UNCHECKED,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(]([^>)]*)[>)] -> [(<]([^>)]*)[)>]/o ))
      elsif (($ip, $from, $to) = ( $p1 =~ /^UNCHECKED,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.*?)[)>]/o )) {
         #TD Passed UNCHECKED, MYNETS LOCAL [192.168.0.1] [192.168.0.1] <from@sample.net> -> <to@example.com> Message-ID: <002e01c759c7$5de437b0$0a02a8c0@somehost>, mail_id: 7vtR-7BAvHZV, Hits: -, queued_as: B5420C2E10, 6585 ms

         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"Unchecked$action"}++;
         $Counts{"Unchecked$action"}{"\L$to"}{$ip}{$from}++;
      }

      elsif (($ip, $from, $to) = ( $p1 =~ /^MTA-BLOCKED,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.*?)[)>]/o )) {
         #TD Blocked MTA-BLOCKED, LOCAL [192.168.0.1] [192.168.0.2] <from@example.com> -> <to@sample.net>, Message-ID: <438548@example.com>, mail_id: tfgTCiyvFw, Hits: -2.54, size: 4895, 31758 ms
         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"MTA$action"}++;
         $Counts{"MTA$action"}{"\L$to"}{$ip}{$from}++;
      }

      elsif (($ip, $from, $to) = ( $p1 =~ /^OVERSIZED,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.*?)[)>]/o )) {
         #TD Blocked OVERSIZED, LOCAL [10.0.0.1] [10.0.0.1] <f@example.com> -> <t@sample.net>, Message-ID: <435@example.com>, mail_id: tfTivFw, Hits: -2.54, size: 444444895, 31758 ms
         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"Oversized$action"}++;
         $Counts{"Oversized$action"}{"\L$to"}{$ip}{$from}++;
      }

      elsif (($ip, $from, $to) = ( $p1 =~ /^OTHER,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.*?)[)>]/o )) {
         #TD Blocked OTHER, LOCAL [10.0.0.1] [10.0.0.1] <f@example.com> -> <t@sample.net>, Message-ID: <435@example.com>, mail_id: tfTivFw, Hits: -2.54, size: 495, 31758 ms
         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"Other$action"}++;
         $Counts{"Other$action"}{"\L$to"}{$ip}{$from}++;
      }

      # TempFailPassed, TempFailBlocked
      #XXX elsif (($action, $ip, $from, $to) = ( $p1 =~ /^(?:(Passed|Blocked) )?TEMPFAIL,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(]([^>)]*)[>)] -> [(<]([^>)]*)[)>]/o ))
      elsif (($ip, $from, $to) = ( $p1 =~ /^TEMPFAIL,[^[]*(?: \[($re_IP)\])?(?: \[$re_IP\])* [<(](.*?)[>)] -> [(<](.*?)[)>]/o )) {
      #TD Blocked TEMPFAIL, [10.0.0.2] [10.0.0.1] <user@example.com> -> <to@sample.net>, Message-ID: <200703302301.9f1899470@example.com>, mail_id: bgf52ZCNbPo, Hits: -2.586, 3908 ms

         $from = '<>' if ($from eq '');
         $ip = '*unknown IP' if ($ip eq '');
         $Totals{"Tempfail$action"}++;
         $Counts{"Tempfail$action"}{"\L$to"}{$ip}{$from}++;
      }
      else {
         inc_unmatched('passblock', $OrigLine);
         next;
      }
   } # end Passed or Blocked

   # MAIA
   elsif ($p1 =~ /^FAKE SENDER, ([^:]+): ($re_IP), (.*)$/) {
      #TD FAKE SENDER, SPAM: 192.168.0.1, bogus@example.com
      $Totals{'FakeSender'}++;
      $Counts{'FakeSender'}{$1}{$2}{$3}++;
   }

   elsif ($p1 =~ /^p\d+ \d+(?:\/\d+)* Content-Type: ([^,]+)(?:, size: [^,]+, name: (.*))?/) {
      #TD p006 1 Content-Type: multipart/mixed
      #TD p008 1/1 Content-Type: multipart/signed
      #TD p001 1/1/1 Content-Type: text/plain, size: 460 B, name: 
      #TD p002 1/1/2 Content-Type: application/pgp-signature, size: 189 B, name: 
      #TD p002 1/2 Content-Type: application/octet-stream, size: 3045836 B, name: abc.pdf
      my ($type, $subtype) = $1 !~ '""' ? split /\//, $1 : ('unspecified', 'unspecified');

      if ($2 =~ /^\s+$/) {
         $Counts{'ContentType'}{$type}{$subtype}++;
      }
      else {
         #print "$OrigLine\n";
         $Counts{'ContentType'}{$type}{$subtype}{$2}++;
      }
   }

   # LMTP/SMTP connection
   elsif (my ($size) = ($p1 =~ /^[LS]MTP:(?:\[$re_IP\])?:\d+ .* SIZE=(\d+) / )) {
     #TD LMTP::10024 /var/spool/amavis/tmp/amavis-20070119T144757-09086: <from@example.com> -> <to@sample.net> SIZE=1000 Received: from mail.sample.net ([127.0.0.1]) by localhost (mail.sample.net [127.0.0.1]) (amavisd-new, port 10024) with LMTP for <to@sample.net>; Fri, 19 Jan 2007 15:41:45 -0800 (PST)
     #TD SMTP:[127.0.0.1]:10024 /var/spool/amavis/tmp/amavis-20070119T144757-09086: <from@example.com> -> <to@sample.net> SIZE=2500000 Received: from mail.sample.net ([127.0.0.1]) by localhost (mail.sample.net [127.0.0.1]) (amavisd-new, port 10024) with LMTP for <to@sample.net>; Fri, 19 Jan 2007 15:41:45 -0800 (PST)
     $Totals{'BytesScanned'} += $size;
   }

   elsif ( ($reason) = ( $p1 =~ /^BAD HEADER from [^:]+: (.+)$/ ) or
           ($reason) = ( $p1 =~ /check_header: \d, (.+)$/ ) ) {
      # When log_level > 1, provide additional header or MIME violations

      # amavisd < 2.4.0, log_level >= 1
      #TD BAD HEADER from <bogus@example.com>: Improper use of control character (char 0D hex) in message header 'Received': Received: example.com[10.0.0.1\r]
      #TD BAD HEADER from <bogus@example.com>: MIME error: error: part did not end with expected boundary
      #TD BAD HEADER from <bogus@example.com>: Non-encoded 8-bit data (char F7 hex) in message header 'Subject': Subject: \367\345\370\361 \344\351\351\362\345\365\n    
      #TD BAD HEADER from (bulk ) <bogus@bounces@lists.example.com>: Non-encoded 8-bit data (char E6 hex) in message header 'Subject': Subject: spam\\346ham\\n 
      #TD BAD HEADER from (list) <bogus@bounces@lists.example.com>: MIME error: error: part did not end with expected boundary
      #  amavisd >= 2.4.3, log_level >= 2
      #TD check_header: 2, Non-encoded 8-bit data (char AE hex): Subject: RegionsNet\\256 Online Banking\\n
      #TD check_header: 2, Non-encoded 8-bit data (char E1 hex): From: "any user" <from\\341k@example.com>\\n
      #TD check_header: 8, Duplicate header field: "Reply-To"
      #TD check_header: 8, Duplicate header field: "Subject"
      #TD check_header: 4, Improper folded header field made up entirely of whitespace (char 09 hex): X-Loop-Detect: 3\\n\\t\\n
      #TD check_header: 4, Improper folded header field made up entirely of whitespace: Received: ...8 ;         Thu, 10 Jan 2008 03:41:35 +0100\\n\\t \\n


      my $subreason;
      if ($reason =~ /^(Non-encoded 8-bit data) \((char \S+ hex)\): (.*)$/) {
         $reason = $1;
         $subreason = "$2: $3";
      }
      elsif ($reason =~ /^(Improper use of control character|Non-encoded 8-bit data) \((char \S+ hex)\) in \S+ header [^:]+: (.+)$/) {
         $reason = $1;
         $subreason = "$2: $3";
      }
      elsif ($reason =~ /^(Improper folded header field made up entirely of whitespace):? (.*)/) {
         $reason = $1;
         $subreason = $2;
      }
      elsif ($reason =~ /^(Duplicate header field): "(.+)"$/) {
         $reason = $1;
         $subreason = $2;
      }
      elsif ($reason =~ /^(MIME error): (?:error: )?(.+)$/) {
         $reason = $1;
         $subreason = $2;
      }

      $Totals{'BadHeaderSupp'}++;
      $Counts{'BadHeaderSupp'}{$reason}{$subreason}++;
   }

   elsif ( $p1 =~ /: spam level exceeds quarantine cutoff level/ ) {
      #TD do_notify_and_quarantine: spam level exceeds quarantine cutoff level 20
      $Totals{'SpamDiscarded'}++;
   }

   elsif ( $p1 =~ /^spam_scan: (.*)$/) {
      if ($1 =~ /^not wasting time on SA, message longer than/ ) {
         #TD spam_scan: not wasting time on SA, message longer than 409600 bytes: 1326+4115601
         $Totals{'SABypassed'}++;
      }
      # ignore other spam_scan lines
   }

   # WARN:
   elsif ( ($reason) = ( $p1 =~ /^WARN: MIME::Parser error: (.*)$/ )) {
      # WARN: MIME::Parser error: unexpected end of header
      $Totals{'MimeError'}++;
      $Counts{'MimeError'}{$reason}++;
   }

   elsif ($p1 =~ /^WARN: address modified \((\w+)\): <(.*?)> -> <(.*)>$/) {
      #TD WARN: address modified (sender): <root> -> <root@>
      #TD WARN: address modified (recip): <root> -> <root@>
      #TD WARN: address modified (recip): <postmaster> -> <postmaster@>
      #TD WARN: address modified (recip): <"test@example.com"@> -> <"teszt@example.com">
      #TD WARN: address modified (sender): <fr\344om@sample.net> -> <"fr\344om"@sample.net>
      $Totals{'WarningAddressModified'}++;
      $Counts{'WarningAddressModified'}{$1 eq 'sender' ? "Sender address" : "Recipient address"}{"$2 -> $3"}++;
   }

   # NOTICE:
   elsif ($p1 =~ /^NOTICE: (.*)$/) {
      if ($1 =~ /^Not sending DSN, spam level ([\d.]+ )?exceeds DSN cutoff level/ ) {
         $Totals{'NoDSNSentCutoff'}++;
      }
      elsif ($1 =~ /^Not sending DSN to believed-to-be-faked sender/ ) {
         $Totals{'NoDSNSentFaked'}++;
      }
      elsif ($1 =~ /^DSN contains [^;]+; bounce is not bounc[ai]ble, mail intentionally dropped/ ) {
         $Totals{'NoDSNSentBad'}++;
      }
      elsif ($1 =~ /^Skipping (?:bad|extra) output from file\(1\)/ ) {
         #TD NOTICE: Skipping extra output from file(1): blah
         #TD NOTICE: Skipping bad output from file(1) at [1, p002], got: blah
         $Totals{'FileOutputSkipped'}++;
      }
      else {
         inc_unmatched('NOTICE', $OrigLine);
         next;
      }
   }

   # INFO:
   elsif ($p1 =~ /^INFO: (.*)$/) {
      if ($1 =~ /^truncat(ed|ing)/) {
         #TD INFO: truncating long header field (len=2639): X-Spam-Report: =?iso-8859-1?Q?=0A=0A*__1=2E7_SUBJECT=5FENCODED=5FTWICE_Subject=3A_MIME_e?= =?iso-885...
         #TD INFO: truncated 1 header line(s) longer than 998 characters
         $Totals{'TruncatedHeader'}++;
      } elsif ( $1 =~ /^no existing header field 'Subject', inserting it/ ) {
         $Totals{'NoSubject'}++;
      }
      elsif (my ($savers1, $savers2, $item) = ( $1 =~ /^(?:SA version: ([^,]+), ([^,]+), )?no optional modules: (.+)$/ )) {
         #TD INFO: SA version: 3.1.8, 3.001008, no optional modules: DBD::mysql Mail::SpamAssassin::Plugin::DKIM Mail::SpamAssassin::Plugin::URIDetail Error
         if ($savers1 !~ /^$/) {
            $StartInfo{'sa_version'} = "$savers1 ($savers2)";
         }
         foreach my $code (split / /, $item) {
            $StartInfo{'Code'}{'Not loaded'}{$code} = "";
         }
      }
      else {
         inc_unmatched('INFO', $OrigLine);
         next;
      }
   }

   elsif ( ($action,$reason,$from,$to) = ($p1 =~ /^DSN: NOTIFICATION: Action:([^,]+), ([^,]+), <(.*?)> -> <(.*?)>/ )) {
      #TD DSN: NOTIFICATION: Action:failed, LOCAL 554 Banned, <from@example.net> -> <to@example.com>
      #TD DSN: NOTIFICATION: Action:delayed, LOCAL 454 Banned, <from@example.com> -> <to@example.net>

      $Totals{'DSNNotification'}++;
      $Counts{'DSNNotification'}{$action}{$reason}{"$from -> $to"}++;
   }

   elsif (($item, $from, $to) = ( $p1 =~ /^Quarantined message release: ([^ ]+) <(.*?)> -> (.+)$/ ) or
          ($item, $from, $to) = ( $p1 =~ /^Quarantine release ([^ ]+): overriding recips <([^>]*)> by (.+)$/ )) {
      #TD Quarantined message release: hiyPJOsD2m9Z <from@sample.net> -> <to@example.com>
      #TD Quarantined message release: hiyPJOsD2m9Z <> -> <to@recipient.maildir>,<anyone@example.com>
      #TD Quarantine release arQcr95dNHaW: overriding recips <TO@EXAMPLE.COM> by <to@example.com>
      $from = '<>' if ($from eq '');
      $to =~ s/[<>]//g;
      $Totals{'Released'}++;
      $Counts{'Released'}{"\L$from"}{$to}{$item}++;
   }
   elsif ($p1 =~ /^Quarantine release ([^:]+): missing X-Quarantine-ID$/) {
      #TD Quarantine release 7ejEBC7MThSc: missing X-Quarantine-ID
      $Totals{'WarningNoQuarantineID'}++;
      $Counts{'WarningNoQuarantineID'}{$1}++;
   }

   elsif ( ($stage,$reason) = ($p1 =~ /^Negative SMTP resp[.]? to ([^:]+): *(.*)$/ )) {
      $Totals{'SmtpResponse'}++;
      $Counts{'SmtpResponse'}{'Negative response'}{$stage}{$reason}++;
   }
   elsif ( ($stage,$reason) = ($p1 =~ /^smtp resp to ([^:]+): *(.*)$/ )) {
      $Totals{'SmtpResponse'}++;
      $Counts{'SmtpResponse'}{'Response'}{$stage}{$reason}++;
   }

   elsif ( ($item) = ($p1 =~ /^response to RCPT TO for <([^>]*)>: "501 Bad address syntax"/ )) {
      #TD response to RCPT TO for <""@example.com>: "501 Bad address syntax"
      $Totals{'BadAddress'}++;
      $Counts{'BadAddress'}{$item}++;
   }

   # do_unip: archive extraction
   elsif ($p1 =~ s/^do_unzip: \S+, //) {
      $Totals{'ArchiveExtract'}++;

      if ( $p1 =~ s/^\d+ members are encrypted, // ) {
         #TD do_unzip: p003, 4 members are encrypted, none extracted, archive retained
         $Counts{'ArchiveExtract'}{'Encrypted'}{$p1}++;

      } elsif ( $p1 =~ /^zero length members, archive retained/ ) {
         #TD do_unzip: p002, zero length members, archive retained
         $Counts{'ArchiveExtract'}{'Empty member'}{''}++;

      } elsif ($p1 =~ s/^unsupported compr\. method: //) {
         #TD do_unzip: p003, unsupported compr. method: 99
         $Counts{'ArchiveExtract'}{'Unsupported compression'}{$p1}++;
      }
      else {
         $Counts{'ArchiveExtract'}{'*unknown'}{$p1}++;
      }
   }

   # do_cabextract: archive extraction
   elsif ($p1 =~ s/^do_cabextract: //) {
      #TD do_cabextract: can't parse toc line:  File size | Date       Time     | Name
      #TD do_cabextract: can't parse toc line: All done, no errors.
      $Totals{'ArchiveExtract'}++;
      if ( $p1 =~ /^([^:]+):\s*(.*)/ ) {
         $Counts{'ArchiveExtract'}{"\u$1"}{$2}++;
      } else {
         $Counts{'ArchiveExtract'}{$p1}{''}++;
      }
   }

   elsif ( $p1 =~ /^(?:\(!\) *)?SA TIMED OUT,/ ) {
      $Totals{'SATimeout'}++;
   }

   # I don't know how many variants of time outs there are... I suppose we'll fix as we go
   elsif (($p1 =~ /^\(!+\)([^ ]*) is taking longer than \d+ s and will be killed/) or 
          ($p1 =~ /^\(!+\)(.*) av-scanner FAILED: timed out/) or
          ($p1 =~ /^(?:\(!+\))?(.*): timed out/))
   {
      #TD (!)/usr/local/bin/uvscan is taking longer than 10 s and will be killed
      #TD (!!)NAI McAfee AntiVirus (uvscan) av-scanner FAILED: timed out
      #TD ClamAV-clamd: timed out, retrying (1)
      #TD (!)Sophie: timed out, retrying (2)

      $Totals{'AVTimeout'}++;
      $Counts{'AVTimeout'}{$1}++;
   }
   elsif (($p2) = ($p1 =~ /SMTP shutdown: (.*)$/)) {                      # log level -1
      #TD SMTP shutdown: Error writing a SMTP response to the socket: Broken pipe at (eval 49) line 836, <GEN232> line 51.
      #TD SMTP shutdown: tempdir is to be PRESERVED: /var/amavis/tmp/amavis-20070704T095350-13145
      strip_trace($p2);
      if ($p2 =~ /^tempdir is to be PRESERVED: (.*)\/([^\/]+)$/) {
         $Totals{'TmpPreserved'}++;
         $Counts{'TmpPreserved'}{$1}{$2}++;
         $p2 = "Preserved tempdir in $1";
      }
      $Totals{'WarningSmtpShutdown'}++;
      $Counts{'WarningSmtpShutdown'}{ucfirst($p2)}++;
   }

   elsif (($p1 =~ /PRESERVING EVIDENCE in (.*)\/([^\/]+)$/) or
         ($p1 =~ /tempdir is to be PRESERVED: (.*)\/([^\/]+)$/)) {
      #TD (!)TempDir removal: tempdir is to be PRESERVED: /var/amavis/tmp/amavis-20080110T173606-05767
      # log level -1
      #TD PRESERVING EVIDENCE in /var/amavis/tmp/amavis-20070704T111558-14883
      $Totals{'TmpPreserved'}++;
      $Counts{'TmpPreserved'}{$1}{$2}++;
   }

   # catchall for all most warnings
   elsif (($p1 =~ /^\(!+\)/) or
          ($p1 =~ /^TROUBLE/) or
          ($p1 =~ /Can't (?:connect to UNIX|send to) socket/) or
          ($p1 =~ /.*: Empty result from /) or
          ($p1 =~ /open\(.*\): Permission denied/) or
          ($p1 =~ /^WARN: / ) or
          ($p1 =~ /Can't send SIG \d+ to process \[\d+\]: Operation not permitted/) or
          ($p1 =~ /(policy protocol: INVALID(?: AM\.PDP)? ATTRIBUTE LINE: .*)$/))
   {
      #TD (!)loading policy bank "AM.PDP-SOCK": unknown field "0"
      #TD (!!)policy_server FAILED: SQL quarantine code not enabled at (eval 37) line 306, <GEN6> line 4.
      #TD (!!)policy_server FAILED: Can't open file /var/spool/amavis/quarantine/spam-CFJYXmeS+FLy: Permission denied at (eval 37) line 330, <GEN28> line 5.
      #TD ClamAV-clamd: Empty result from /var/run/clamav/clamd, retrying (1)
      #TD dccproc[17422]: open(/var/dcc/map): Permission denied
      #TD TROUBLE in check_mail:  FAILED: Died at /usr/sbin/amavisd-maia line 2872, <GEN4> line 22.
      #TD TROUBLE in check_mail: spam_scan FAILED: DBD::mysql::st execute failed: MySQL server has gone away at /usr/sbin/amavisd-maia line 3786, <GEN4> line 3036.
      #TD TROUBLE in process_request: DBD::mysql::st execute failed: MySQL server has gone away at (eval 35) line 258, <GEN18> line 3.
      #TD TROUBLE in process_request: DBD::mysql::st execute failed: Lost connection to MySQL server during query at (eval 35) line 258, <GEN3> line 3.
      #TD TROUBLE in process_request: Can't call method "disconnect" on an undefined value at /usr/sbin/amavisd-maia line 2895, <GEN4> line 22.
      #TD TROUBLE: recipient not done: <to@example.com> smtp response ...
      #TD (!!)TROUBLE in process_request: Can't create file /var/amavis/tmp/amavis-98/email.txt: File exists at /usr/local/sbin/amavisd line 4774, <GEN12> line 4.
      #TD TROUBLE: lookup table is an unknown object: object ...
      #TD (!) policy protocol: INVALID ATTRIBUTE LINE: /var/spool/courier/tmp/114528/D967099\n 
      #TD (!) policy protocol: INVALID AM.PDP ATTRIBUTE LINE: /var/spool/courier/tmp/114528/D967099\n 

      $p1 =~ s/^\(!+\)s*//;

      if ($p1 =~ /^WARN: (Using cpio instead of pax .*)$/) {
         #TD (!)WARN: Using cpio instead of pax can be a security risk; please add:  $pax='pax';  to amavisd.conf and check that the pax(1) utility is available on the system!
         $Totals{'WarningSecurity'}++;
         $Counts{'WarningSecurity'}{$1}++;
         next;
      }

      $p1 =~ s/, retrying\s+\(\d+\)$//;
      strip_trace($p1);

      # canonicalize variations of the same message
      $p1 =~ s/^run_av \(([^,]+), built-in i\/f\)/$1/;
      $p1 =~ s/ av-scanner FAILED: CODE\(0x[^)]+\)/:/;
      $p1 =~ s/^(.+: Too many retries to talk to \S+) .*/$1/;

      if (($p1 =~ /(\S+): Can't (?:connect|send) to (?:UNIX )?(.*)$/) or
          ($p1 =~ /(\S+): (Too many retries to talk to .*)$/))
      {

         #TD (!)ClamAV-clamd: Can't connect to UNIX socket /var/run/clamav/clamd.socket: No such file or directory, retrying (2)
         #TD (!)ClamAV-clamd: Can't connect to UNIX socket /var/run/clamav/clamd: Connection refused, retrying (2)
         #TD ClamAV-clamd: Can't connect to UNIX socket /var/run/clamav/clamd: Connection refused, retrying (1)
         #TD ClamAV-clamd: Can't send to socket /var/run/clamav/clamd: Transport endpoint is not connected, retrying (1)
         #TD Sophie: Can't send to socket /var/run/sophie: Transport endpoint is not connected, retrying (1)
         #TD (!)run_av (Sophie, built-in i/f): Too many retries to talk to /var/run/sophie (timed out) at (eval 55) line 310, <GEN16> line 16.
         #TD (!)run_av (ClamAV-clamd, built-in i/f): Too many retries to talk to /var/run/clamav/clamd.socket (Can't connect to UNIX socket /var/run/clamav/clamd.socket: No such file or directory) at (eval 52) line 310.
         #TD (!!)ClamAV-clamd av-scanner FAILED: CODE(0x804fa08) Too many retries to talk to /var/run/clamav/clamd.socket (Can't connect to UNIX socket /var/run/clamav/clamd.socket: No such file or directory) at (eval 52) line 310. at (eval 52) line 511.
         #TD (!!)Sophie av-scanner FAILED: CODE(0x814fd24) Too many retries to talk to /var/run/sophie (timed out) at (eval 55) line 310, <GEN16> line 16. at (eval 55) line 511, <GEN16> line 16.

         $Totals{'AVConnectFailure'}++;
         $Counts{'AVConnectFailure'}{$1}{ucfirst($2)}++;
         next;
      }

      # simplify or canonicalize variations of the same message
      $p1 =~ s/^TROUBLE(:| in) //;
      $p1 =~ s/Can't create file \S+: (.+)$/Can't create file: $1/;
      $p1 =~ s/Can't send SIG \d+ to process \[\d+\]/Can't send SIG to process/;

      $Totals{'Warning'}++;
      $Counts{'Warning'}{$p1}++;
   }

   # Begin forced warnings: Keep this code below warning catchall
   elsif ($p1 =~ /^lookup_sql: /) {
      #TD lookup_sql: 2006, MySQL server has gone away
      $Totals{'WarningSQL'}++;
      $Counts{'WarningSQL'}{'SQL died'}++;

   } elsif (($reason,$item) = ($p1 =~ /^connect_to_sql: ([^']+) '\S+': (.*?)(?: \(\d+\))?$/) or
            ($item,$reason) = ($p1 =~ /^lookup_sql_field\((.*)\) \(WARN: (no such field in the SQL table)\)/)) {
      #TD connect_to_sql: unable to connect to DSN 'DBI:mysql:maia:sqlhost1.example.com': Lost connection to MySQL server during query
      #TD connect_to_sql: unable to connect to DSN 'DBI:mysql:maia:sqlhost2.example.com': Can't connect to MySQL server on 'sqlhost2.example.com' (111)
      #TD lookup_sql_field(id) (WARN: no such field in the SQL table), "from@example.com" result=undef
      $Totals{'WarningSQL'}++;
      $Counts{'WarningSQL'}{ucfirst("$reason: $item")}++;
   }
   # End forced warnings

   # Panic
   elsif ( ($p2) = ($p1 =~ /^(?:\(!\)\s*)?PANIC, (.*)$/ )) {
      #TD PANIC, PANIC, SA produced a clone process of [19122], TERMINATING CLONE [19123]

      $Totals{'Panic'}++;
      $Counts{'Panic'}{$p2}++;

   }

   # Fatal
   elsif ( $p1 =~ /^Requesting process rundown after fatal error$/ ) {
      #TD Requesting process rundown after fatal error
      $Totals{'Fatal'}++;
      $Counts{'Fatal'}{$p1}++;

   # DCC
   } elsif (($reason) = ($p1 =~ /^(missing message body; fatal error)/) or
            ($reason) = ($p1 =~ /^(try to start dccifd)/)) {
      $Totals{'DccError'}++;
      $Counts{'DccError'}{ucfirst($reason)}++;
   }
   elsif ($p1 =~ /^continue not asking DCC \d+ seconds after failure/) {
      $Totals{'DccError'}++;
      $Counts{'DccError'}{'Continue not asking DCC after failure'}++;
   }
   elsif ($p1 =~ /^no DCC answer from (\S+) after \d+ ms$/) {
      #TD dccproc[111]: no DCC answer from 10.0.0.1,6789 after 6053 ms
      $Totals{'DccError'}++;
      $Counts{'DccError'}{"No answer from $1"}++;
   }

   elsif ( ($reason, $from, $to) = ($p1 =~ /^skip local delivery\((\d+)\): <(.*?)> -> <(.*?)>$/ )) {
      $from = '<>' if ($from eq '');
      $reason = $reason == 1 ? "No localpart" : $reason == 2 ? "Local alias is null" : "Other";
      $Totals{'LocalDeliverySkipped'}++;
      $Counts{'LocalDeliverySkipped'}{$reason}{$from}{$to}++;
   }

   # soft white/black listing
   elsif ($p1 =~ /^wbl: (.*)$/) {
      $p1 = $1;
      next if ($p1 =~ /^(?:white|black)listed (?:by|sender)/);
      next if ($p1 =~ /^black or whitelisted/);
      next if ($p1 =~ /^checking sender/);
      next if ($p1 =~ /^\(SQL\) recip .* matches$/);

      #TD wbl: soft-whitelisted (-3) sender <from@example.com> => <to@sample.net>, recip_key="."
      if ($p1 =~ /^(?:\(SQL\) )?soft-(white|black)listed \([^)]+\) sender <([^>]*)>/ ) {
         $Totals{"\u${1}listed"}++;
         $Counts{"\u${1}listed"}{$2}++;
      }
      else {
         inc_unmatched('wbl', $OrigLine);
         next;
      }
   }

   # XXX: WHITELISTED or BLACKLISTED should be caught in SPAM tag above
   elsif (($p1 =~ /^white_black_list: whitelisted sender/ ) or
          ($p1 =~ /.* WHITELISTED/) ) {
      $Totals{'Whitelisted'}++;

   } elsif (($p1 =~ /^white_black_list: blacklisted sender/ ) or
	        ( $p1 =~ /.* BLACKLISTED/) ) {
      $Totals{'Blacklisted'}++;

   # The virus_scan line does not correctly report multiple virus names when detected by more than
   # one scanner.  Use the ask_av and run_av lines below
   #
   #} elsif ( my ($malware, $scanners) = ($p1 =~ /virus_scan: \(([^)]+)\), detected by \d+ scanners: (.*)$/ )) {
      #TD virus_scan: (HTML.Phishing.Bank-43), detected by 1 scanners: ClamAV-clamd
      #TD virus_scan: (Worm.SomeFool.D, Worm.SomeFool.D), detected by 1 scanners: ClamAV-clamd
      #TD virus_scan: (Trojan.Downloader.Small-9993), detected by 2 scanners: ClamAV-clamd, NAI McAfee AntiVirus (uvscan)
   #   foreach (split /, /, $scanners) {
   #      #$Totals{'MalwareByScanner'}++;       # No summary output: redundant w/Malware{Passed,Blocked}
   #      $Counts{'MalwareByScanner'}{"$_"}{$malware}++;
   #   }

   } elsif ($p1 =~ /^(?:ask_av|run_av) (.*)$/) {
      if ( ($scanner, $malware) = ($1 =~ /^\((.+)\):(?: [^:]+)? INFECTED: ([^,]+)/ )) {
         #TD ask_av (ClamAV-clamd): /var/spool/amavis/tmp/amavis-20070830T070403-13776/parts INFECTED: Email.Malware.Sanesecurity.07082700
         #TD run_av (NAI McAfee AntiVirus (uvscan)): INFECTED: W32/Zhelatin.gen!eml, W32/Zhelatin.gen!eml
         $Counts{'MalwareByScanner'}{$scanner}{$malware}++;
      }
      # currently ignoring other ask_av or run_av lines
   }

   # Extra Modules loaded at runtime
   elsif (($item) = ( $p1 =~ /^extra modules loaded(?: after daemonizing)?: (.+)$/ )) {
      #TD extra modules loaded: PerlIO.pm, PerlIO/scalar.pm
      foreach my $code (split /, /, $item) {
         #TD extra modules loaded: unicore/lib/gc_sc/Digit.pl, unicore/lib/gc_sc/SpacePer.pl
         # avoid useless reporting of pseudo-modules which can't be pre-loaded once
         unless ($code =~ m#^unicore/lib/#) {
            $Totals{'ExtraModules'}++;
            $Counts{'ExtraModules'}{$code}++;
         }
      }

   # Timing report
   } elsif (my ($total,$report) = ( $p1 =~ /^TIMING \[total (\d+) ms\] - (.+)$/ )) {

      #TD TIMING [total 5808 ms] - SMTP greeting: 5 (0%)0, SMTP LHLO: 1 (0%)0, SMTP pre-MAIL: 2 (0%)0, SMTP pre-DATA-flush: 5 (0%)0, SMTP DATA: 34 (1%)1, check_init: 1 (0%)1
      # older format, maia mailguard
      #TD TIMING [total 3795 ms] - SMTP EHLO: 1 (0%), SMTP pre-MAIL: 0 (0%), maia_read_system_config: 1 (0%), maia_get_mysql_size_limit: 0 (0%), SA check: 3556 (94%), rundown: 0 (0%)

      # Timing line is incomplete - let's report it
      if ($p1 !~ /\d+ \(\d+%\)\d+$/ and $p1 !~ /\d+ \(\d+%\)$/) {
         inc_unmatched('timing', $OrigLine);
         next;
      }

      if ($Opts{'timings'}) {
         my @pairs = split(/[,:] /, $report);
         while (my ($key,$value) = @pairs) {
            #4 (0%)0
            my ($ms) = ($value =~ /^(\d+) /);
            # maintain a per-SA test list of timings
            push @{$Timings{$key}}, $ms;
            shift @pairs; shift @pairs;
         }
         push @TimingsTotals, $total;
      }

   # Decoders
   } elsif (my ($suffix, $info) = ( $p1 =~ /^Internal decoder for (\.\S*)\s*(?:\(([^)]*)\))?$/ )) {
      #TD Internal decoder for .gz   (backup, not used)
      #TD Internal decoder for .zip 
      $StartInfo{'Decoders'}{'Internal'}{$suffix} = $info;

   } elsif (($suffix, $decoder) = ( $p1 =~ /^No decoder for\s+(\.\S*)\s*(?:tried:\s+(.*))?$/ )) {
      #TD No decoder for       .tnef tried: tnef
      # older
      #TD No decoder for       .doc
      $StartInfo{'Decoders'}{'None'}{$suffix} = "tried: " . ($decoder ? $decoder : "unknown");

   } elsif (($suffix, $decoder) = ( $p1 =~ /^Found decoder for\s+(\.\S*)\s+at\s+(.*)$/ )) {
      $StartInfo{'Decoders'}{'External'}{$suffix} = $decoder;

   # AV Scanners
   } elsif (my ($tier, $scanner, $location) = ( $p1 =~ /^Found (primary|secondary) av scanner (.+) at (.+)$/ )) {
      #TD Found primary av scanner NAI McAfee AntiVirus (uvscan) at /usr/local/bin/uvscan
      #TD Found secondary av scanner ClamAV-clamscan at /usr/local/bin/clamscan

      $StartInfo{'AVScanner'}{"\u$tier"}{$scanner} = $location;

   } elsif ( (($tier, $scanner) = ( $p1 =~ /^Using internal av scanner code for \(([^)]+)\) (.+)$/ )) or
             (($tier, $scanner) = ( $p1 =~ /^Using (.*) internal av scanner code for (.+)$/ ))) {
      #TD Using internal av scanner code for (primary) ClamAV-clamd
      #TD Using primary internal av scanner code for ClamAV-clamd

      $StartInfo{'AVScanner'}{"\u$tier internal"}{$scanner} = "";

   # (Un)Loaded code, protocols, etc.
   } elsif (my ($code, $loaded) = ( $p1 =~ /^(\S+)\s+(?:proto? |base |protocol )?\s*(?:code)?\s+((?:NOT )?loaded)$/ )) {
      $StartInfo{'Code'}{"\u\L$loaded"}{$code} = "";

   } elsif (my ($module, $vers,) = ( $p1 =~ /^Module (\S+)\s+(.+)$/ )) {
      #TD Module Amavis::Conf        2.086
      $StartInfo{'Code'}{'Loaded'}{$module} = $vers;

   } elsif (($code, $location) = ( $p1 =~ /^Found \$(\S+)\s+at\s+(.+)$/ )) {
      #TD Found $file            at /usr/bin/file
      #TD Found $uncompress at /usr/bin/gzip -d
      $StartInfo{'Code'}{'Loaded'}{$code} = $location;

   } elsif (($code, $location) = ( $p1 =~ /^No \$(\S+),\s+not using it/ )) {
      #TD No $dspam,             not using it
      $StartInfo{'Code'}{'Not loaded'}{$code} = $location;

   } elsif ( $p1 =~ /^starting\.\s+(.+) at \S+ (?:amavisd-new-|Maia Mailguard )([^,]+),/ ) {
      #TD starting.  /usr/local/sbin/amavisd at mailhost.example.com amavisd-new-2.5.0 (20070423), Unicode aware, LANG="C"
      #TD starting.  /usr/sbin/amavisd-maia at vwsw02.eon.no Maia Mailguard 1.0.2, Unicode aware, LANG=en_US.UTF-8
      %StartInfo = ('ampath' => $1, 'amversion' => $2);            # track only most recent startup

   } elsif ( $p1 =~ /^config files read: (.*)$/ ) {
      #TD config files read: /etc/amavisd.conf, /etc/amavisd-overrides.conf
      $StartInfo{'Configs'} = "$1";

   } elsif ( $p1 =~ /^Creating db in ([^;]+); [^,]+, (.*)$/ ) {
      #TD Creating db in /var/spool/amavis/db/; BerkeleyDB 0.31, libdb 4.4
      $StartInfo{'db'} = "$1\t($2)";
 
   } elsif (my ($log) = ($p1 =~ /^logging initialized, log (level \d+, syslog: \S+)/ )) {
      $StartInfo{'Logging'} = $log;

   } elsif (( $p1 =~ /^user=([^,]*), EUID: (\d+) [(](\d+)[)];\s+group=([^,]*), EGID: ([\d ]+)[(]([\d ]+)[)]/ )) {
      # uninteresting...
      #$StartInfo{'IDs'}{'user'} = $1;
      #$StartInfo{'IDs'}{'euid'} = $2;
      #$StartInfo{'IDs'}{'uid'} = $3;
      #$StartInfo{'IDs'}{'group'} = $4;
      #$StartInfo{'IDs'}{'egid'} = $5;
      #$StartInfo{'IDs'}{'gid'} = $6;

   } elsif (($p2) = ( $p1 =~ /^Net::Server: (.*)$/ )) {
      if ($p2 =~ /^.*starting! pid\((\d+)\)/) {
         #TD Net::Server: 2007/05/02-11:05:24 Amavis (type Net::Server::PreForkSimple) starting! pid(4405)
         $StartInfo{'Server'}{'pid'} = $1;
      } elsif ($p2 =~ /^Binding to UNIX socket file (.*) using/ ) {
         #TD Net::Server: Binding to UNIX socket file /var/spool/amavis/amavisd.sock using SOCK_STREAM
         $StartInfo{'Server'}{'socket'} = $1;
      } elsif ($p2 =~ /^Binding to TCP port (\d+) on host (.*)$/ ) {
         #TD Net::Server: Binding to TCP port 10024 on host 127.0.0.1
         $StartInfo{'Server'}{'ip'} = "$2:$1";
      } elsif ($p2 =~ /^Setting ([ug]id) to "([^"]+)"$/ ) {
         $StartInfo{'Server'}{$1} = $2;
         #TD Net::Server: Setting gid to "91 91"
         #TD Net::Server: Setting uid to "91"
      }
      # skip others

   } else {
      # Report any unmatched entries...
      inc_unmatched('final', $OrigLine);
   }
}

########################################
# Final tabulations, and report printing

# at detail 5, print level 1, detail 6: level 2, ...
my $max_level_global = $Opts{'detail'} - 4;;

$Totals{'TotalHams'} =
        $Totals{'CleanPassed'}
      + $Totals{'CleanBlocked'}
      + $Totals{'BadHeaderPassed'}
      + $Totals{'BadHeaderBlocked'}
      ;

$Totals{'TotalSpams'} =
        $Totals{'SpamPassed'}
      + $Totals{'SpamBlocked'}
      + $Totals{'SpamDiscarded'}
      ;

$Totals{'TotalMalware'} =
        $Totals{'MalwarePassed'}
      + $Totals{'MalwareBlocked'}
      ;

$Totals{'TotalBanned'} =
      + $Totals{'BannedNamePassed'}
      + $Totals{'BannedNameBlocked'}
      ;

$Totals{'TotalUnchecked'} =
      + $Totals{'UncheckedPassed'}
      + $Totals{'UncheckedBlocked'}
      ;

$Totals{'TotalOther'} =
      + $Totals{'MTABlocked'}
      + $Totals{'OversizedBlocked'}
      + $Totals{'TempfailPassed'}
      + $Totals{'TempfailBlocked'}
      + $Totals{'OtherBlocked'}
      ;

$Totals{'TotalMsgs'} =
        $Totals{'TotalHams'}
      + $Totals{'TotalSpams'}
      + $Totals{'TotalMalware'}
      + $Totals{'TotalBanned'}
      + $Totals{'TotalUnchecked'}
      + $Totals{'TotalOther'}
      ;
   

# Print the summary report if any key has non-zero data.
# Note: must explicitely check for any non-zero data,
# as Totals always has some keys extant.
#
if (!exists $Opts{'nosummary'}) {
   for (keys %Totals) {
      if ($Totals{$_}) {
         printSummaryReport (@Sections);
         last;
      }
   }
}

# Print the detailed report, if detail is sufficiently high
#
if ($Opts{'detail'} >= 5) {
   printDetailReport (@Sections);
   printSpamScorePercentilesReport;
   printSpamScoreFrequencyReport;
   printSARulesReport;
   printTimingsReport;
   printStartupInfoReport        if ($Opts{'detail'} >= 10);
}


# Print unmatched lines
#
if (keys %UnmatchedList) {
   my $line;

   print "\n\n**Unmatched Entries**\n";
   foreach $line (sort {$UnmatchedList{$b}<=>$UnmatchedList{$a} } keys %UnmatchedList) {
      printf "%8d   %s\n",  $UnmatchedList{$line}, $line;
   }
}


##################################################

# Inserts commas in numbers for easier readability
#
sub commify ($) {
    my $text = reverse $_[0];
    $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
    return scalar reverse $text;
}

# Unitize a number, and return appropriate printf formatting string
#
sub unitize($ $) {
   my ($num, $fmt) = @_;
   my $kilobyte = 1024;
   my $megabyte = 1048576;
   my $gigabyte = 1073741824;
   my $terabyte = 1099511627776;

   if ($num >= $terabyte) {
      $num /= $terabyte;
      $fmt .= '.3fT';
   } elsif ($num >= $gigabyte) {
      $num /= $gigabyte;
      $fmt .= '.3fG';
   } elsif ($num >= $megabyte) {
      $num /= $megabyte;
      $fmt .= '.3fM';
   } elsif ($num >= $kilobyte) {
      $num /= $kilobyte;
      $fmt .= '.3fK';
   } else {
      $fmt .= 'd ';
   }

   return ($num, $fmt);
}

sub printSummaryReport (\@) {
   my ($formats) = @_; 
   my $output_occurred = 0;
   my $sect_had_output = 0;

   if (1 or $Opts{'detail'} >= 5) {
      my $header = "****** Summary ";
      print $header, '*' x ($Opts{'max_report_width'} - length $header), "\n\n";
   }

   for ( @$formats ) {
      my ($keyname, $numfmt, $desc, $divisor) = ($_->[0], $_->[1],$_->[2], $_->[3]);

      # start a new section; controls subsequent newline output
      if ($keyname eq '__SECTION') {
         $sect_had_output = 0;
         next;
      }

      # print blank line if keyname is null string
      if ($keyname eq '\n') {
         print "\n"  if ($output_occurred && $sect_had_output);

      } elsif (length ($keyname) == 1) {
         printf "%s   %s\n", $keyname x 8, $keyname x 58  if ($output_occurred && $sect_had_output);

      } elsif ($Totals{$keyname} > 0) {
         my $col   = undef;
         my $fmt   = '%8';
         my $extra = ' %25s';
         my $total = $Totals{$keyname};

         # split trailing column number, if present; defaults to col 1
         $col = $1    if ($numfmt =~ s/(\d)$//);

         # Z format provides  unitized or unaltered totals, as appropriate
         if ($numfmt =~ /^Z/) {
            ($total, $fmt) = unitize ($total, $fmt);
         }
         else {
            $fmt .= "$numfmt ";
            $extra ='';
         }

         if ($divisor) {
            if ($$divisor == $Totals{$keyname}) {
               printf "$fmt  %-50s 100.00%%\n", $total, $desc;
            }
            else {
               if ($col == 1) {
                  #       3        Clean passed                          12.00%
                  printf "$fmt       %34s   %6.2f%%\n", $total, $desc, $Totals{$keyname} * 100 / $$divisor;
               }
               else {
                  #       8   Ham -----------------------------------   -------   32.00%
                  printf "$fmt  %s %s   -------  %6.2f%%\n", $total, $desc, '-' x (38 - length($desc)), $Totals{$keyname} * 100 / $$divisor;
               }
            }
         }
         else {
           printf "$fmt  %-31s $extra\n", $total, $desc, commify ($Totals{$keyname});
         }
         $output_occurred++;
         $sect_had_output++;
      }
   }
   print "\n";
}

sub printDetailReport (\@) {
   my ($formats) = @_; 
   my $header_printed = 0;

   return unless (keys %Counts);

   for ( @$formats ) {
      my ($keyname, $numfmt, $desc, $divisor) = ($_->[0], $_->[1],$_->[2], $_->[3]);

      next if (! exists $Counts{$keyname});

      my $max_level = 11;			         # default: unlimited to buildTree
      my $max_level_unspecified = 0;		# default: assume no level limiter specified
      my $topn = -1;				            # default: don't limit to top n level 1 items

      if (exists $Opts{"\L$keyname"}) {
         $max_level = $Opts{"\L$keyname"};
         if ($max_level =~ /^(\d*)\.(\d+)$/) {
            if ($1) {
               $max_level = $1;
            }
            else {
               $max_level = 11;		      # top n specified, but no max level
               $max_level_unspecified = 1;
            }
            $topn = $2;
         }
      }
      else {
         $max_level_unspecified = 1;
      }
      #print "max_level: $max_level, max_level_unspecified: $max_level_unspecified, topn: $topn\n";

      my ($count, $listref) = buildTree (%{$Counts{$keyname}}, $max_level, 0);

      if ($count > 0) {
         $desc =~ s/^\s+//; 
         if ($max_level_unspecified or $max_level > 0) {
            if (! $header_printed) {
               my $header = "****** Detail ";
               print $header, '*' x ($Opts{'max_report_width'} - length $header), "\n";
               $header_printed = 1;
            }
            printf "\n%8d   $desc %s\n", $count, '-' x ($Opts{'max_report_width'} - 12 - length($desc))
         }

         printTree ($listref, $topn);
      }
   }
   print "\n";
}


=pod
 Spam score percentiles report

   ==================================================================================
   Spam Score Percentiles        0%       50%       90%       95%       98%      100%
   ----------------------------------------------------------------------------------
   Score                     -6.000     2.000    20.250    22.875    24.450    25.500
   ==================================================================================
=cut
sub printSpamScorePercentilesReport {
   if (defined $Opts{'score_percentiles'} and @SpamScores) {
      #print "Scores: @SpamScores\n";
      my @sorted = sort { $a <=> $b } @SpamScores;
      my @percents = split /[\s,]+/, $Opts{'score_percentiles'};
      my @p = getpercentiles (@sorted, @percents);

      my $myfw2 = $fw2 - 1;
      print  "\n", $sep1 x $fw1, $sep1 x $fw2 x @percents;
      printf "\n%-${fw1}s" . "%${myfw2}s%%" x @percents ,   "Spam Score Percentiles", @percents;
      print  "\n", $sep2 x $fw1, $sep2 x $fw2 x @percents;
      printf "\n%-${fw1}s" . "%${fw2}.3f" x scalar (@p),    "Score", @p;
      print  "\n", $sep1 x $fw1, $sep1 x $fw2 x @percents, "\n";
   }
}

=pod
 Spam score frequency report

 ======================================================================================================
 Spam Score Frequency      <= -10     <= -5      <= 0      <= 5     <= 10     <= 20     <= 30      > 30
 ------------------------------------------------------------------------------------------------------
 Hits                           3        88       393        69        17         7         1         3
 Percent of Hits            0.52%    15.15%    67.64%    11.88%     2.93%     1.20%     0.17%     0.52%
 ======================================================================================================
=cut
sub printSpamScoreFrequencyReport {
   if (defined $Opts{'score_frequencies'} and @SpamScores) {
      my @sorted  = sort { $a <=> $b } @SpamScores;
      my @buckets = sort { $a <=> $b } split /[\s,]+/, $Opts{'score_frequencies'};
      push @buckets, $buckets[-1] + 1;
      #print "Scores: @sorted\n";

      my @p = getfrequency (\@sorted, @buckets);

      my @ranges = ( 0 ) x @buckets;
      my $last = @buckets - 1;
      $ranges[0]   = sprintf "%${fw2}s", " <= $buckets[0]";
      $ranges[-1]  = sprintf "%${fw2}s", " > $buckets[-2]";
      for my $i (1 .. @buckets - 2) {
         $ranges[$i] = sprintf "%${fw2}s", " <= $buckets[$i]";
      }

      print  "\n", $sep1 x $fw1, $sep1 x $fw2 x @buckets;
      printf "\n%-${fw1}s" . "%-${fw2}s" x @buckets ,     "Spam Score Frequency", @ranges;
      print  "\n", $sep2 x $fw1, $sep2 x $fw2 x @buckets;
      printf "\n%-${fw1}s" . "%${fw2}d" x scalar (@p),    "Hits", @p;
      my $myfw2 = $fw2 - 1;
      printf "\n%-${fw1}s" . "%${myfw2}.2f%%" x scalar (@p),    "Percent of Hits", map {($_ / scalar (@SpamScores)) * 100.0; } @p;
      print  "\n", $sep1 x $fw1, $sep1 x $fw2 x @buckets, "\n";
   }
}

sub printSARulesReport {
   if (defined $Opts{'sarules'} and keys %{$Counts{'SArules'}}) {
      our $maxlen = 0;

      sub getSAHitsReport($ $) {
         my ($type, $topn) = @_;
         my $i = 1;
         my @report = ();

         return if ($topn eq '0');     # topn can be numeric, or the string "all"

         for (sort { $Counts{'SArules'}{$type}{$b} <=> $Counts{'SArules'}{$type}{$a} } keys %{$Counts{'SArules'}{$type}}) {

            # only show top n lines; all when topn is "all"
            if ($topn ne 'all' and $i > $topn) {
               push @report, "...\n";
               last;
            }
            my $n     = $Counts{'SArules'}{$type}{$_};
            my $nham  = $Counts{'SArules'}{'Ham'}{$_};
            my $nspam = $Counts{'SArules'}{'Spam'}{$_};
            # rank, count, % msgs, % spam, % ham
            push @report, sprintf "%4d %8d   %6.2f%%  %6.2f%%  %6.2f%%     %s\n",
               $i++,
               $n,
               $Totals{'TotalMsgs'}  == 0 ? 0 : 100.0 * $n / $Totals{'TotalMsgs'},
               $Totals{'TotalSpams'} == 0 ? 0 : 100.0 * $nspam / $Totals{'TotalSpams'},
               $Totals{'TotalHams'}  == 0 ? 0 : 100.0 * $nham  / $Totals{'TotalHams'},
               $_;
            my $len = length $report[-1];
            $maxlen = $len  if ($len > $maxlen);
         }
         return @report;
      }
      
      my ($limit_spam, $limit_ham);
      my ($def_limit_spam, $def_limit_ham) = split /[\s,]+/, $Defaults{'sarules'};
      ($limit_spam, $limit_ham) = split /[\s,]+/, $Opts{'sarules'};
      $limit_spam = $def_limit_spam    if $limit_spam eq '';
      $limit_ham  = $def_limit_ham     if $limit_ham  eq '';

      my @report_spam = getSAHitsReport('Spam', $limit_spam);
      my @report_ham  = getSAHitsReport('Ham',  $limit_ham);

      if (scalar @report_spam) {
         print "\n", "=" x $maxlen, "\n";
         print "SpamAssassin Rule Hits: Spam\n";
         print "-" x $maxlen, "\n";
         print "Rank     Hits    % Msgs   % Spam    % Ham      Score Rule\n";
         print "----     ----    ------   ------    -----      ----- ----\n";
         print @report_spam;
      }

      if (scalar @report_ham) {
         print "\n", "=" x $maxlen, "\n";
         print "SpamAssassin Rule Hits: Ham\n";
         print "-" x $maxlen, "\n";
         print "Rank     Hits    % Msgs   % Spam    % Ham      Score Rule\n";
         print "----     ----    ------   ------    -----      ----- ----\n";
         print @report_ham;
         print "\n", "=" x $maxlen, "\n";
      }
   }
}

=pod
 Amavis timings percentiles report

   ===================================================================================================================
   Timing Percentiles         % Time   Total (s)        0%       10%       25%       50%       75%       90%      100%
   -------------------------------------------------------------------------------------------------------------------
   SA check                   81.58%      73.006     7.558    13.347    22.030    36.503    50.975    59.659    65.448
   AV-scan-1                  18.42%      16.484     2.415     3.580     5.329     8.242    11.155    12.904    14.069
   ...
   ===================================================================================================================
   Total Time                100.00%      89.490     9.973    16.927    27.359    44.745    62.131    72.563    79.517 

=cut
sub printTimingsReport {
   # Timing report
   if ($Opts{'timings'} and %Timings) {
      my (@p, @sorted, %pertest_totals, @col_subtotals);
      my ($pcnt,$time_total_actual,$time_total_hypo,$subtotal_pcnt);
      my @percents = split /[\s,]+/, $Opts{'timings_percentiles'};
      my $header_footer = '=' x 45 . '==========' x @percents;
      my $header_end    = '-' x 45 . '----------' x @percents;

      print "\n$header_footer\n";
      printf "%-25s  %6s %11s" ." %8s%%" x @percents , "Timing Percentiles", "% Time", "Total (s)", @percents;
      print "\n$header_end\n";

      # Sum the total time spent on all tests, and for each SA test, sum their timings
      # This would represent a worst/best case min to max (ie. percentile 0/percentile 100)
      # sampling of the actual message timings
      foreach my $satest (keys %Timings) {
         foreach my $timeval (@{$Timings{$satest}}) {
            $pertest_totals{$satest} += $timeval;
         }
         $time_total_hypo += $pertest_totals{$satest};
      }

      # Sum total time spent scanning
      map {$time_total_actual += $_} @TimingsTotals;

      my $max_pcnt = $Opts{'timings'} != 100 ? $Opts{'timings'} : 150;
      my $rows = 0;
      # foreach SA test, sort the values as required to get the list of percentiles 
      for (sort { $pertest_totals{$b} <=> $pertest_totals{$a} } keys %Timings) {
         $pcnt = ($pertest_totals{$_} / $time_total_actual) * 100,
         @sorted = sort { $a <=> $b } @{$Timings{$_}};
         @p = getpercentiles (@sorted, @percents);
                                                      # per-SA test running column subtotals:
         $col_subtotals[0] += $pcnt;                  #  - percentage of total time
         $col_subtotals[1] += $pertest_totals{$_};    #  - total time
         for my $col (1 .. @p) {
            $col_subtotals[$col+1] += $p[$col-1];     #  - percentiles time
         }
         if ($subtotal_pcnt < $max_pcnt) {
            $subtotal_pcnt += $pcnt;
            printf "%-25s %6.2f%% %11.3f" . " %9.3f" x scalar (@p) . "\n",
                     $_,                                 # key
                     $pcnt,                              # percent of total time
                     $pertest_totals{$_} / 1000,         # total time for this test
                     map {$_ / 1000} @p;                 # list of percentiles
            $rows++;
         }
      }
      print "...\n"  if ($rows != scalar keys %Timings);

      print "$header_footer\n";

      # actual sum of total times reported by amavis
      @sorted = sort { $a <=> $b } @TimingsTotals;
      @p = getpercentiles (@sorted, @percents);
      printf "%-25s %6.2f%% %11.3f" . " %9.3f" x scalar (@p) . "\n",
               'Scan Time: Actual',
               100.0,
               $time_total_actual / 1000,
               map {$_ / 1000} @p;

      # hypothetical worst/best total times, computed by cherry picking the
      # best/worst times of each test, and computing percentiles from those values
      printf "%-25s %6.2f%% %11.3f" . " %9.3f" x @p . "\n",
         "Scan Time: Hypothetical",
         $col_subtotals[0],
         $time_total_hypo / 1000,
         map {$_ / 1000} @col_subtotals[2..$#col_subtotals];
   }
}

sub printStartupInfoReport {

   # Most recent startup info report
   if ($Opts{'startinfo'} and keys %StartInfo) {

      sub print2col($ $) {
         my ($label,$val) = @_;
         printf "%-50s %s\n", $label, $val;
      }

      print "\n\nAmavis Startup\n";

      print2col ("    Amavis",       $StartInfo{'ampath'})             if (exists $StartInfo{'ampath'});
      print2col ("        Version",  $StartInfo{'amversion'})          if (exists $StartInfo{'amversion'});
      print2col ("        PID",      $StartInfo{'Server'}{'pid'})      if (exists $StartInfo{'Server'}{'pid'});
      print2col ("        Socket",   $StartInfo{'Server'}{'socket'})   if (exists $StartInfo{'Server'}{'socket'});
      print2col ("        TCP port", $StartInfo{'Server'}{'ip'})       if (exists $StartInfo{'Server'}{'ip'});
      print2col ("        UID",      $StartInfo{'Server'}{'uid'})      if (exists $StartInfo{'Server'}{'uid'});
      print2col ("        GID",      $StartInfo{'Server'}{'gid'})      if (exists $StartInfo{'Server'}{'gid'});
      print2col ("        Logging",  $StartInfo{'Logging'})            if (exists $StartInfo{'Logging'});
      print2col ("        Configuration Files",  $StartInfo{'Configs'})            if (exists $StartInfo{'Configs'});
      print2col ("    SpamAssassin", $StartInfo{'sa_version'})         if (exists $StartInfo{'sa_version'});
      print2col ("    Database",     $StartInfo{'db'})                 if (exists $StartInfo{'db'});
      #if (keys %{$StartInfo{'IDs'}}) {
      #   print "    Process startup user/group:\n";
      #   print "        User:  $StartInfo{'IDs'}{'user'}, EUID: $StartInfo{'IDs'}{'euid'}, UID: $StartInfo{'IDs'}{'uid'}\n";
      #   print "        Group: $StartInfo{'IDs'}{'group'}, EGID: $StartInfo{'IDs'}{'egid'}, GID: $StartInfo{'IDs'}{'gid'}\n";
      #}

      sub print_modules ($ $) {
         my ($key, $label) = @_;
         print "    $label\n";
         foreach (sort keys %{$StartInfo{$key}}) {
            print "        $_\n";
            foreach my $module (sort keys %{$StartInfo{$key}{$_}}) {
               if ($StartInfo{$key}{$_}{$module}) {
                  print2col ("            " . $module, $StartInfo{$key}{$_}{$module});
               }
               else {
                  print2col ("            " . $module, "");
               }
            }
         }
      };
      print_modules('AVScanner', 'Antivirus scanners');
      print_modules('Code',      'Code, modules and external programs');
      print_modules('Decoders',  'Decoders');

   }
}

sub printTree($ $) {
   my ($listref, $topn) = @_;
   my ($entry, $rets);
   my $cutlength = $Opts{'max_report_width'} - 3;

   #print "listref: $listref, L1_items: $topn\n";

   foreach $entry (sort bycount @$listref) {
      if (ref($entry) ne "HASH") {
         die "Unexpected entry in tree: $entry\n";
      }
      unless ($topn--) {
         print "     ...\n";
         last;
      }
      #print "LEVEL: $entry->{LEVEL}, TOTAL: $entry->{TOTAL}, HASH: $entry, DATA: $entry->{DATA}\n";

      # XXX not sure if I want to keep this... just comment out for now
      # for readability, print a blank line to separate 2nd level headings, but only if children exist
      #
      #print "\n"  if (($entry->{LEVEL} == 0) && ($Opts{'detail'} > 5) && ($entry->{CHILDREF} != undef) && (@{$entry->{CHILDREF}} != 1));

      $rets = sprintf "%8d%s%s", $entry->{TOTAL}, '   ' x ($entry->{LEVEL} + 2),  $entry->{DATA};
      if ($Opts{'debug'}) {
         printf "%-130s %-60s\n", $rets, $entry->{DEBUG};
      }
      else {
         $rets =~ s/^(.{$cutlength}).*$/$1.../o   if ($Opts{'detail'} <= 10);
         printf "%s\n", $rets;
      }
      printTree ($entry->{CHILDREF}, -1) if ($entry->{CHILDREF} != undef);
   }
}

# XXX optimize this using packed default sorting.  Analysis shows speed isn't an issue though
sub bycount {
   my $re_IP_strict = qr/\b(25[0-5]|2[0-4]\d|[01]?\d{1,2})\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})\b/;
   # Sort by totals, then IP address if one exists, and finally by data as a string

   local $SIG{__WARN__} = sub { print "*** PLEASE REPORT:\n*** $_[0]*** Unexpected: \"$a->{DATA}\", \"$b->{DATA}\"\n" };

   $b->{TOTAL} <=> $a->{TOTAL}

      ||

   #do {
   #   print "RE: $re_IP_strict\n";
   #   if ($a->{DATA} =~ /^$re_IP_strict/o) { printf "A: %-50s: \"%s\" \"%s\" \"%s\" \"%s\"\n", $a->{DATA}, $1, $2, $3, $4; } else { print "A BAD: $a->{DATA}\n"; }
   #   if ($b->{DATA} =~ /^$re_IP_strict/o) { printf "B: %-50s: \"%s\" \"%s\" \"%s\" \"%s\"\n", $b->{DATA}, $1, $2, $3, $4; } else { print "B BAD: $b->{DATA}\n"; }

   pack('C4' => $a->{DATA} =~ /^$re_IP_strict/o) cmp pack('C4' => $b->{DATA} =~ /^$re_IP_strict/o)
   #   pack('C4' => $a->{DATA} =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})\b/o) cmp
   #         pack('C4' => $b->{DATA} =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}\b)/o)
   #}

      ||

   $a->{DATA} cmp $b->{DATA}
}

#
# Builds a tree of REC structures from the multi-key %Counts hashes
# 
# Parameters:
#    Hash:  A multi-key hash, with keys being used as category headings, and leaf data
#           being tallies for that set of keys
#    Level: This current recursion level.  Call with 0.
#
# Returns:
#    Listref: A listref, where each item in the list is a rec record, described as:
#           DATA:      a string: a heading, or log data
#           TOTAL:     an integer: which is the subtotal of this item's children
#           LEVEL:     an integer > 0: representing this entry's level in the tree
#           CHILDREF:  a listref: references a list consisting of this node's children
#    Total: The cummulative total of items found for a given invocation
#

sub buildTree(\% $ $) {
   my ($href, $max_level_item, $level) = @_; 
   my ($subtotal, $childList, $rec);

   my @tmpList;
   my $item;
   my $total = 0;

   @tmpList = ();

   foreach $item (sort keys %$href) {
      if (ref($href->{$item}) eq "HASH") {
         #print " " x ($level * 4), "HASH: LEVEL $level: Item: $item, type: \"", ref($href->{$item}), "\"\n";

         ($subtotal, $childList) = buildTree (%{$href->{$item}}, $max_level_item, $level + 1);

         if ($level < $max_level_global and $max_level_item > $level) {
            # me + children
            $rec = {
               DATA  => $item,
               TOTAL => $subtotal,
               LEVEL => $level,
            };
            $rec->{DEBUG} = "L$level: Count: $subtotal, max_level_global: $max_level_global, max_level_item: $max_level_item"      if ($Opts{'debug'});

         #   if ($level > $max_level_global) {
         #      $rec->{CHILDREF} = undef;
         #   }
         #   else {
               $rec->{CHILDREF} = $childList,
         #   }
            push (@tmpList, $rec);
         }

         $total += $subtotal;
      }
      else {
         if ($item !~ /^$/ and $level < $max_level_global and $max_level_item > $level) {
            $rec = {
               DATA  => $item,
               TOTAL => $href->{$item},
               LEVEL => $level,
               CHILDREF => undef,
            };
            $rec->{DEBUG} = "L$level: Count: $href->{$item}, max_level_global: $max_level_global, max_level_item: $max_level_item"      if ($Opts{'debug'});
            push (@tmpList,  $rec);
         }
         $total += $href->{$item};
      }
   }

   #print " " x ($level * 4), "LEVEL $level: Returning from level $level\n";

   return ($total, \@tmpList);
}

# Set values for the configuration variables passed via hashref.
# Variables are of the form ${progname_prefix}_KEYNAME.
#
# Because logwatch lowercases all config file entries, KEYNAME is
# case-insensitive.
#
sub env_to_cmdline(\%) {
   my $href = shift;
   my ($configvar, $value, $var);

   my @cmdline = ();
   while ( ($configvar, $value) = each %$href ) {
      if ($configvar =~ s/^${progname_prefix}_//) {
         push @cmdline, "--$configvar";
         push @cmdline, $value  if defined ($value);
      }
   }
   return @cmdline;
}

# Obtains the variables from a logwatch-style .conf file, for use
# in standalone mode.  Returns an ENV-style hash of key/value pairs.
#
sub get_vars_from_file($) {
   my $file = shift;
   my %hash;
   my ($var, $val);

   open FILE, "$file" or die "unable to open configuration file $file: $!";
   while (<FILE>) {
      chomp;
      next if (/^\s*$/);   # ignore all whitespace lines
      next if (/^\*/);     # ignore logwatch's *Service lines
      next if (/^\s*#/);   # ignore comment lines
      if (($var,$val) = (/^\s*\$(${progname_prefix}_[^=\s]+)\s*=\s*"?(.*?)"?$/o)) {
         #print "VAR: \"$var\", VAL: \"$val\"\n";
         if ($val =~ /^(?:no|false)$/i) {
            $hash{$var} = 0;
         } elsif ($val =~ /^(?:yes|true)$/i) {
            $hash{$var} = 1;
         } elsif ($val eq '') {
            $var =~ s/amavis_/amavis_no/;
            $hash{$var} = undef;
         } else {
            $hash{$var} = $val;
         }
      }
   }
   close FILE         or die "failed to close configuration handle for $file: $!";

   return \%hash;
}

sub process_config_file($ $) {
   my ($option, $file) = @_;
   my  $message = undef;
   my ($ret) = stat ($file);
   if ($ret == 0) { $message = $!; }
   elsif (! -r _) { $message = "Permission denied"; }
   elsif (-d _)   { $message = "Is a directory"; }
   elsif (! -f _) { $message = "Not a regular file"; }

   if ($message) {
      print "Configuration file $file: $message\n";
      exit (2);
   }

   my $href = get_vars_from_file($file);
   unshift @ARGV, env_to_cmdline(%$href);
}

# Returns a list of percentile values given a 
# sorted array of numeric values.  Uses the formula:
#
# r = 1 + (p(n-1)/100) = i + d  (Excel method)
#
# r = rank
# p = desired percentile
# n = number of items
# i = integer part, d = decimal part
# 
# Arg1 is an array ref to the sorted series
# Arg2 is a list of percentiles to use

sub getpercentiles(\@ @) { 
   my ($aref,@plist) = @_;
   my ($n, $last, $r, $d, $i, @vals, $Yp);

   $last = $#$aref;
   $n = $last + 1;
   #printf "%6d" x $n . "\n", @{$aref};

   #printf "n: %4d, last: %d\n", $n, $last;
   foreach my $p (@plist) {
      $r = 1 + ($p * ($n - 1) / 100.0);
      $i = int ($r);		# integer part
      if ($i == 0) {
        $Yp = $aref->[0];
      }
      elsif ($i == $n) {
        $Yp = $aref->[$last];
      }
      else {
         $d = $r - $i;		# decimal part
         #p = Y[i] + d(Y[i+1] - Y[i]), but since we're 0 based, use i=i-1
         $Yp = $aref->[$i-1] + ($d * ($aref->[$i] - $aref->[$i-1]));
      }
      #printf "p(%3.2f), r: %6.2f, i: %6d, d: %6.2f, Yp: %6d\n", $p, $r, $i, $d, $Yp;
      push @vals, $Yp;
   }

   return @vals;
}

# Returns a list of frequency distributions given an incrementally sorted
# set of sorted scores, and an incrementally sorted list of buckets
#
# Arg1 is an array ref to the sorted series
# Arg2 is a list of frequency buckets to use
sub getfrequency(\@ @) { 
   my ($aref,@blist) = @_;

   my @vals = ( 0 ) x (@blist);
   my @sorted_blist = sort @blist;
   my $bucket_index = 0;

OUTER: foreach my $score (@$aref) {
      #print "Score: $score\n";
      my $i = 0;
      for $i ($bucket_index .. @sorted_blist - 1) {
         #print "\tTrying Bucket[$i]: $sorted_blist[$i]\n";
         if ($score > $sorted_blist[$i]) {
            $bucket_index++;
         }
         else {
            #printf "\t\tinto Bucket[%d]\n", $bucket_index;
            $vals[$bucket_index]++;
            next OUTER;
         }
      }
      #printf "\t\tinto Bucket[%d]\n", $bucket_index - 1;
      $vals[$bucket_index - 1]++;
   }

   return @vals;
}

sub inc_unmatched($ $) {
   my ($id, $line) = @_;
   $UnmatchedList{$line}++;
   print "UNMATCHED($id): \"$line\"\n"  if ($Opts{'debug'});
}

sub usage($) {
   my $ret = "";
   $ret = "@_\n"  if ($_[0]);

   $ret .= <<"END_USAGE";
Usage: $progname [ ARGUMENTS ] [logfile ...]

   ARGUMENTS can be one or more of options listed below.  Later options override earlier ones.
   Any argument may be abbreviated to an unambiguous length.  Input comes from named logfiles,
   or STDIN.

   --help                              print usage information
   --version                           print program version
   --config_file FILE                  use alternate configuration file FILE
   --debug                             provide debug output
   --syslog_name NAME                  use NAME as the syslog service name for the amavis process

   --detail LEVEL                      print LEVEL levels of detail (default 10)
   --max_report_width WIDTH            limit report width to WIDTH chars (default 100)
   --nodetail                          do not display detail report sections
   --nosummary                         do not display summary section

   --noscore_frequencies               disable spam score frequency report
   --score_frequencies "B1 [B2 ...]"   enable spam score frequency report, using buckets
   --score_frequencies "default"       specified with B1 [B2 ...] (range: real numbers), or using their
                                       internal default values when the keyword "default" is given
   --noscore_percentiles               disable spam score percentiles report
   --score_percentiles "P1 [P2 ...]"   enable spam score percentiles report, using percentiles
   --score_percentiles "default"       specified with P1 [P2 ...] (range: 0...100), or using their
                                       internal default values when the keyword "default" is given

   --nosarules                         disable SpamAssassin spam and ham rules hit reports
   --sarules "S,H"                     enable SpamAssassin spam and ham rules, showing the top S
   --sarules "default"                 enable SpamAssassin spam and ham rules, showing the top S
                                       spam and top H ham rules hit.  (Valid values: 0... , "all")
                                       or the keyword "default").

   --[no]startinfo                     show latest amavis startup details, if available

   --notimings                         disbale the timings report (same as --timings 0)
   --timings PERCENT                   show top PERCENT percent of the timings report (range: 0...100)
   --timings_percentiles "P1 [P2 ...]" set timings report percentiles to P1 [P2 ...]  (range: 0...100)

     Each option below limits the LEVEL of detail shown in the detailed section of the report.
     Prefixing an option with "no" sets the LEVEL for that option to 0 (eg. --nospampassed is
     equivalent to --spampassed 0).

END_USAGE
   foreach my $var ( @Sections ) {
      next if ($var->[0] =~ /^.$/);
      next if ($var->[0] =~ /^\\n$/);
      next if ($var->[0] =~ /^__/);
      $ret .= sprintf "   --%-28s%s\n", "\L$var->[0]" . " LEVEL", "section: \"$var->[2]\"";
   }
   $ret .= "\n";
   return $ret;
}

sub version($) {
   print STDOUT "@_\n"  if ($_[0]);
   print STDOUT "$progname: $Version\n";
   exit 0;
}

sub strip_trace($) {
   # at (eval 37) line 306, <GEN6> line 4.
   # at /usr/sbin/amavisd-maia line 2895, <GEN4> line 22.
   #$_[0] =~ s/ at \(.+\) line \d+(?:, \<GEN\d+\> line \d+)?\.$//;
   #$_[0] =~ s/ at (\S+) line \d+(?:, \<GEN\d+\> line \d+)?\.$/: $1/;
   while ($_[0] =~ s/ at .+ line \d+(?:, \<GEN\d+\> line \d+)?\.//) {
      ;
   }
}

# Configures all section and other report options
# to disabled, for ease in command line disabling
# disables all detailed sections and reports, whereby
# one or more can be enabled with subsequent command line
# options
#   eg. prog --nodetail --score_percentiles=default
#
sub zero_opts ( ) {
   foreach my $var ( @Sections ) {
      next if ($var->[0] =~ /^.$/);
      next if ($var->[0] =~ /^\\n$/);
      next if ($var->[0] =~ /^__/);
      $Opts{"\L$var->[0]"} = 0;
   }
   $Opts{'score_percentiles'} = undef;
   $Opts{'score_frequencies'} = undef;
   $Opts{'sarules'} = undef;
   $Opts{'timings'} = 0;
   $Opts{'startinfo'} = 0;
}


# Getopt helper, sets an option in Opts hash to one of three
# values: its default, the specified value, or undef if the option
# was the "no" prefixed variant.
#
sub triway_opts ($ $) {
   my ($opt,$val) = @_;

   die "Option \"--${opt}\" requires an argument" if ($val =~ /^--/);

   if ($opt =~ s/^no//i) {
      $Opts{$opt} = undef;
   } elsif ('default' =~ /^${val}$/i) {
      $Opts{$opt} = $Defaults{$opt};
   }
   else {
      $Opts{$opt} = $val;
   }
}

exit(0);

# vi: shiftwidth=3 tabstop=3 syntax=perl et
