#!/usr/bin/perl -w

=head1 NAME

octo_extractor - Octopussy Logs Extractor program

=head1 SYNOPSIS

octo_extractor --device <device> --service <service> --taxonomy <taxonomy>
  --begin YYYYMMDDHHMM --end YYYYMMDDHHMM 
	[ --include '<regexp to include>' ] [ --exclude '<regexp to exclude>' ] 
	[ --pid_param <string> ] [ --output <outputfile> ]

=head1 DESCRIPTION

octo_extractor is the program used by the Octopussy Project to extract Logs

=cut

use strict;
no strict 'refs';
use Cache::SharedMemoryCache;
use Getopt::Long;
Getopt::Long::Configure('bundling');

use Octopussy;

my $PROG_NAME = "octo_extractor";
my $PROG_VERSION = Octopussy::Version();

my $help;
my (@opt_devices, @opt_services) = ((), ());
my ($opt_taxonomy, $opt_begin, $opt_end, $pid_param, $output);
my (@opt_include, @opt_exclude);
my $pid_file = undef;
my $dir_pid = Octopussy::Directory("running");
my $shared_memory_cache = undef;

=head1 FUNCTIONS

=head2 String_List($fct, @args)

Returns List of elements separated by ", " from one function and args

=cut
sub String_List
{
	my ($fct, @args) = @_;

	return (join(", ", &{$fct}(@args)));
}

=head2 Help()

Prints Help

=cut
sub Help()
{
my $help_str = <<EOF;

$PROG_NAME (version $PROG_VERSION)

 Usage: $PROG_NAME --device <device> --service <service> --taxonomy <taxonomy>
        --begin YYYYMMDDHHMM --end YYYYMMDDHHMM";
        [ --include '<regexp to include>' ] [ --exclude '<regexp to exclude>' ] 
        [ --pid_param <string> ] [ --output <outputfile> ]

EOF

	print $help_str;
	if (! @opt_devices)
	{ 
		print " Device list: " . String_List("Octopussy::Device::List") . "\n"; 
	}
	elsif (! @opt_services)
	{	
		print " Service list: "
      . String_List("Octopussy::Device::Services", @opt_devices) . "\n";
	}
	elsif (!defined $opt_taxonomy)
	{ 
		print " Taxonomy list: " 
			. String_List("Octopussy::Taxonomy::List_And_Any") . "\n"; 
	}
	print "\n";

	exit();
}

=head2 Progress($msg, $num, $nb_match)

Sets progress status

=cut
sub Progress
{
  my ($msg, $num, $nb_match) = @_;

	$shared_memory_cache->set("status_$$", 
		AAT::Translation($msg) . " [$num] [$nb_match]");
}

=head2 Get_Messages_To_Parse($services, $taxonomy)

Returns list of Messages to parse

=cut
sub Get_Messages_To_Parse($$)
{
	my ($services, $taxonomy) = @_;
	
	my @msg_to_parse = Octopussy::Message::Parse_List($services, $taxonomy, 
		undef, undef, undef, undef);

	return (@msg_to_parse);
}

=head2 Get_TimePeriod_Files($devices, $services, $begin, $end)

Returns list of Files for Devices $devices, Services $services 
and Period $begin-$end

=cut
sub Get_TimePeriod_Files($$$$)
{
	my ($devices, $services, $begin, $end) = @_;

	my ($y1, $m1, $d1, $hour1, $min1) = ($1, $2, $3, $4, $5)
		if ($begin =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})$/);
	my ($y2, $m2, $d2, $hour2, $min2) = ($1, $2, $3, $4, $5)
	    if ($end =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})$/);
	my %start = ( year => $y1, month => $m1, day => $d1,
		hour => $hour1, min => $min1 );
	my %finish = ( year => $y2, month => $m2, day => $d2,
		hour => $hour2, min => $min2 );
	my ($files, $nb_files) = Octopussy::Logs::Minutes_Hash($devices, $services, \%start, \%finish);

	return ($files, $nb_files);
}

=head2 Print_Logs($devices, $services, $taxo, $begin, $end, $re_incl, $re_excl)

Prints Logs

=cut
sub Print_Logs($$$$$$$)
{
  my ($devices, $services, $taxo, $begin, $end, $re_incl, $re_excl) = @_;
	my $time = time();
	my @lines = ();
	my @includes = ();
  my @excludes = ();
  foreach my $inc (AAT::ARRAY($re_incl))
    { push(@includes, qr/$inc/) if (AAT::NOT_NULL($inc)); }
  foreach my $excl (AAT::ARRAY($re_excl))
    { push(@excludes, qr/$excl/) if (AAT::NOT_NULL($excl)); }
	
	Progress("_MSG_EXTRACT_PROGRESS_LISTING_FILES", "1/1", 0);
	my ($files, $total) 
		= Get_TimePeriod_Files($devices, $services, $begin, $end);
	my $nb_match = 0;
	open(OUT, "> $output")	if (AAT::NOT_NULL($output));
	if ((@{$services}[0] =~ /-ANY-/i) && ($taxo =~ /-ANY-/i))
	{
		my $i = 1;
		foreach my $min (sort keys %{$files})
		{
			my @logs = ();
			foreach my $f (@{$files->{$min}})
			{
				Progress("_MSG_EXTRACT_PROGRESS_DATA", $i . "/$total", $nb_match);
				if (defined open(FILE, "zcat \"$f\" |"))
				{
					while (<FILE>)
					{
						my $line = $_;
						my $match = 1;
      			foreach my $inc (@includes)
        			{ $match = 0  if ($line !~ $inc); }
      			foreach my $excl (@excludes)
        			{ $match = 0  if ($line =~ $excl); }
      			if ($match) 
						{
							push(@logs, $line);
							$nb_match++;
						}
					}
					close(FILE);
				}
				else
				{	 
					print "Unable to open file '$f'\n";
					AAT::Syslog("octo_extractor", "UNABLE_OPEN_FILE", $f);
				}
				$i++;
			}
			if (AAT::NOT_NULL($output))
  		{
    		foreach my $l (sort @logs)
        	{ print OUT $l; }
    	}
  		else
  		{
      	foreach my $l (sort @logs)
        	{ print $l; }
    	}
  	}	
	}
	else
	{
		my $i = 1;
		my @msg_to_parse = Get_Messages_To_Parse($services, $taxo);
		foreach my $min (sort keys %{$files})
    {
			my @logs = ();
      foreach my $f (@{$files->{$min}})
      {
				Progress("_MSG_EXTRACT_PROGRESS_DATA", $i . "/$total", $nb_match);
				if (defined open(FILE, "zcat \"$f\" |"))
				{
					if ($f !~ /Unknown\/\d{4}\/\d{2}\/\d{2}/)
					{
						while (<FILE>)
						{
							my $line = $_;
							foreach my $msg (@msg_to_parse)
							{
								if ($line =~ $msg->{re})
								{
									my $match = 1;
		        			foreach my $inc (@includes)
    		      			{ $match = 0  if ($line !~ $inc); }
        					foreach my $excl (@excludes)
          					{ $match = 0  if ($line =~ $excl); }
									if ($match)
        					{
										push(@logs, $line);
          					$nb_match++;
        					}
									last;
								}
							}
						}
					}
					else
					{
						while (<FILE>)
          	{
							push(@logs, $_);
              $nb_match++;		
						}
					}
					close(FILE);
					$i++;
				}
				else
      	{
        	print "Unable to open file '$f'\n";
        	AAT::Syslog("octo_extractor", "UNABLE_OPEN_FILE", $f);
      	}
			}
			if (AAT::NOT_NULL($output))
      {
        foreach my $l (sort @logs)
          { print OUT $l; }
      }
      else
      {
        foreach my $l (sort @logs)
          { print $l; }
      }	
		}
	}
	close(OUT)	if (AAT::NOT_NULL($output));
	
	AAT::Syslog("octo_extractor", "LOG_SEARCH", join(",", @opt_devices),
  	join(",", @opt_services), "${opt_begin}-${opt_end}", time() - $time);
}

=head2 End()

Ends Extraction

=cut
sub End
{
  AAT::Syslog($PROG_NAME, "Logs Extraction Aborted !");
  unlink($pid_file);
	$shared_memory_cache->remove("status_$$");
  exit();
}

#
# MAIN
#
$SIG{USR2} = \&End;

my $status = GetOptions(
	"h" => \$help, "help" => \$help,
	"device=s" => \@opt_devices, "service=s" => \@opt_services, 
	"taxonomy=s" => \$opt_taxonomy, 
	"begin=s" => \$opt_begin, "end=s" => \$opt_end, 
	"include=s" => \@opt_include, "exclude=s" => \@opt_exclude, 
	"pid_param=s" => \$pid_param, "output=s" => \$output );

Help()	if ((! $status) || ($help) 
	|| (! @opt_devices) || (! @opt_services) 
	|| (!defined $opt_begin) || (!defined $opt_end));

$shared_memory_cache =
  new Cache::SharedMemoryCache( { namespace => "octo_extractor",
    default_expires_in => "1 hour" } ) or
  croak( "Couldn't instantiate SharedMemoryCache" );

my $pid_name = $PROG_NAME . (defined $pid_param ?"_$pid_param" : "");
$pid_file = Octopussy::PID_File($pid_name);

Print_Logs(\@opt_devices, \@opt_services, $opt_taxonomy, 
	$opt_begin, $opt_end, \@opt_include, \@opt_exclude);

unlink($pid_file);
$shared_memory_cache->remove("status_$$");

=head1 AUTHOR

Sebastien Thebert <octo.devel@gmail.com>

=head1 SEE ALSO

octo_dispatcher, octo_parser, octo_uparser, octo_reporter, octo_scheduler

=cut
