#! /usr/bin/perl
#
#  Purpose:
#  FlowTracker_Collector runs periodically to collect 5-minute values of
#  netflow data, according to the specified tracking filter.
#
#  Description:
#  FlowTracker_Collector is used to collect 5-minute periods of netflow
#  data which is stored by flow-tools. The script is initiated by the user
#  from a command line, appending an '&' to keep it in the background. For 
#  each Tracking set created by the user and represented by a filter file 
#  in the FlowTracker_Filter directory, FlowTracker_Collector extracts the
#  last 5 minutes worth of data flows that match against the filter. The
#  resulting value is stored into the appropriate RRDtool data base.
#  FlowTracker_Collector will collect netflow totals according to the pre-
#  defined filters. It will collect this data for 5-minute periods that fall
#  on even 5-minute boundaries (e.g., 00:05, 00:10, 00:15, etc.) despite 
#  when it is actually started. It collects data from approximately 30 
#  minutes previous to allow for long flows to be exported into flow-tools.
#  When it is started up, FlowTracker_Collector will check for the last 
#  collect period, and if enough time has not passed, it will go to sleep
#  so that it does not collect twice for the same period.
#
#  Input arguments:
#  Name                 Description
#  -----------------------------------------------------------------------
#  collection_period    Determines the length between collections (e.g., 5 min.)
#  
#  Input files:
#  Name                 Description
#  -----------------------------------------------------------------------
#  Filter file          Used to control the extraction of netflow data
#  RRDtool file         This is the RRDtool file created for this tracking
#
#  Modification history:
#  Author       Date            Vers.   Description
#  -----------------------------------------------------------------------
#  J. Loiacono  07/04/2006      3.0     Original version.
#  J. Loiacono  12/25/2006      3.1     Skip Archived trackings, log error
#  J. Loiacono  02/22/2007      3.2     Now uses single concatenation per device
#  J. Loiacono  12/07/2007      3.3     Alerts, flexible logging
#
#$Author$
#$Date$
#$Header$
#
###########################################################################
#
#               BEGIN EXECUTABLE STATEMENTS
#
 
use FlowViewer_Configuration; 
use FlowViewer_Utilities; 
use lib $cgi_bin_directory; 
use File::stat;

$just_started_up = 1;

$a = 0; while ($a == 0) {

	if (($log_collector_short eq "Y") || ($log_collector_med eq "Y") || ($log_collector_long eq "Y")) {
		open (LOG,">>$log_directory/FlowTracker_Collector.log"); 
	}
	if ($debug_tracker eq "Y") { open (DEBUG,">$work_directory/DEBUG_TRACKER"); }

	($sec,$min,$hr,$date,$mnth,$yr,$day,$yr_date,$DST) = localtime(time);
	$current_date = format_date($sec,$min,$hr,$date,$mnth,$yr);
	
	# Determine next period start and end times ( ... after sleep period.)

	$start_collect_time = time;
	$seconds_past_period = $start_collect_time % $collection_period;
	$period_end = $start_collect_time - $collection_offset - $seconds_past_period;
	$period_start = $period_end - $collection_period;
	
	($sec,$min,$hr,$date,$mnth,$yr,$day,$yr_date,$DST) = localtime($period_start);
	$mnth++;
	if (length($mnth) < 2) { $mnth = "0" . $mnth; }
	if (length($date) < 2) { $date = "0" . $date; }
	$period_start_md = $mnth . $date;
	$start_secs = 3600*$hr + 60*$min + $sec;
	
	($sec,$min,$hr,$date,$mnth,$yr,$day,$yr_date,$DST) = localtime($period_end);
	$mnth++;
	if (length($mnth) < 2) { $mnth = "0" . $mnth; }
	if (length($date) < 2) { $date = "0" . $date; }
	$period_end_md = $mnth . $date;
	$end_secs = 3600*$hr + 60*$min + $sec;
	
	$start_epoch = timelocal(localtime($period_start));
	$end_epoch   = timelocal(localtime($period_end));
	
	$start_flows = &flow_date_time($start_epoch,"LOCAL");
	$end_flows   = &flow_date_time($end_epoch,"LOCAL");
	
	($date_hr,$min,$sec) = split(/:/,$start_flows);
	$start_flows = $date_hr .":". $min .":00";
	
	($date_hr,$min,$sec) = split(/:/,$end_flows);
	$end_flows   = $date_hr .":". $min .":00";
	
	# Set up start and end time for concatenating flow-tools data files

	$cat_start_epoch = $start_epoch - $flow_file_length - 61;
	$cat_end_epoch   = $end_epoch   + $flow_capture_interval + 1 -600;
	$cat_start       = epoch_to_date($cat_start_epoch,"LOCAL");
	$cat_end         = epoch_to_date($cat_end_epoch,"LOCAL");
	
	($cat_start_date,$cat_start_time)   = split(/ /,$cat_start);
	($start_month,$start_day,$start_yr) = split(/\//,$cat_start_date);
	($cat_end_date,$cat_end_time)       = split(/ /,$cat_end);
	($end_month,$end_day,$end_yr)       = split(/\//,$cat_end_date);

	if (($log_collector_short eq "Y") || ($log_collector_med eq "Y")) {
		print LOG "\nAt $current_date started next collection.  Period: $start_flows to $end_flows\n";
	} elsif ($log_collector_long eq "Y") { 
		print LOG "\nStarting next collection loop: $current_date\n";
		print LOG "             start_flows: $start_flows\n";
		print LOG "               end_flows: $end_flows\n";
		print LOG "       Collection Period: $collection_period\n\n";
	}

	# For this time period, go through each tracking 

	$num_trackings = 0;
	$num_zero = 0;
	$num_non_zero = 0;
	%created_concat = ();

	while ($existing_filter = <$filter_directory/*>) {
		
		$alert_threshold = 0;
		$alert_frequency = "";
		$alert_destination = "";

		$concatenate_parameters = "-a -t \"$cat_start\" -T \"$cat_end\" ";
	 
		open (EXISTING,"<$existing_filter");
	
		$filter_suffix = $existing_filter;
	        $filter_suffix =~ s#.*/##; 
		($tracking_file,$suffix) = split(/\./,$filter_suffix);

		if ($suffix eq "archive") { next; }
		if ($suffix eq "grp")     { next; }

		$rrdtool_file   = "$rrdtool_directory/$tracking_file.rrd";

		# If this is a quick restart (within collection period), avoid trying to update an existing period

		if ($just_started_up) {
			open (INFO,">$work_directory/FlowTracker_Collector_info");
			$rrd_info_command = "$rrdtool_bin_directory/rrdtool info $rrdtool_file > $work_directory/FlowTracker_Collector_info";
			system($rrd_info_command);
			open (INFO,"<$work_directory/FlowTracker_Collector_info");
			while (<INFO>) {
				chop;
				$lead = substr($_,0,11);
				if ($lead eq "last_update") { 
					($lead,$last_update) = split(/ = /);
					if ($period_end <= $last_update) {
						$skip = 1;
						if (($log_collector_short eq "Y") || ($log_collector_med eq "Y") || ($log_collector_long eq "Y")) { 
							print LOG "Just starting (and skipping) for: $tracking_file\n"; 
						}
						last;
					}
				}
			}
			close (INFO);
		}

		if ($skip) { $skip = 0; next; }

		# Extract information for this tracking and modify flow-tools filter for this time period

		$num_trackings++;

		open (FILTER,">$work_directory/FlowTracker_Collector_filter");
	
		$sampling_multiplier = "";

		while (<EXISTING>) {
			chop;
			$key = substr($_,0,8);
                	if ($key eq " input: ") {
                        	($input,$field,$field_value) = split(/: /);
                        	if    ($field eq "device_name") { 
					$device_name = $field_value; }
                        	elsif ($field eq "exporter") { 
					$exporter = $field_value; }
                        	elsif ($field eq "tracking_label") { 
					$tracking_label = $field_value; }
                        	elsif ($field eq "sampling_multiplier") { 
					$sampling_multiplier = $field_value; }
                        	elsif ($field eq "alert_threshold") { 
					$alert_threshold = $field_value; }
                        	elsif ($field eq "alert_frequency") { 
					$alert_frequency = $field_value; }
                        	elsif ($field eq "alert_destination") { 
					$alert_destination = $field_value; }
                        	elsif ($field eq "alert_last_notified") { 
					$alert_last_notified = $field_value; }
				else { next; }
			}
			elsif (/permit ge January 1, 2000 00:00:00/) {
				print FILTER "  permit ge $start_flows\n"; }
			elsif (/permit lt January 1, 2000 00:00:00/) {
				print FILTER "  permit lt $end_flows\n"; }
			else {
				print FILTER "$_\n";
				next;
			}
		}

		close (FILTER);
		close (EXISTING);
	
		if ($start_day ne $end_day) {
		        for ($i=0;$i<31;$i++) {
		                if (($cat_start_epoch + $i*86400) > $cat_end_epoch + 86400) { last; }
		                ($sec,$min,$hr,$cat_date,$cat_mnth,$cat_yr,$day,$yr_date,$DST) = localtime($cat_start_epoch + $i*86400);
		                $cat_mnth++;
		                $cat_yr += 1900;
		                if ((0 < $cat_mnth) && ($cat_mnth < 10)) { $cat_mnth = "0" . $cat_mnth; }
		                if ((0 < $cat_date) && ($cat_date < 10)) { $cat_date = "0" . $cat_date; }
		 
                		if ($exporter ne "") { 
                        		$cat_directory = "$exporter_directory"; 
                		} else { 
                        		$cat_directory = "$flow_data_directory/$device_name"; 
                		}
		               
		                if ($N == -3) { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		                if ($N == -2) { $cat_directory .= "/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		                if ($N == -1) { $cat_directory .= "/$cat_yr\-$cat_mnth\-$cat_date"; }
		                if ($N == 1)  { $cat_directory .= "/$cat_yr"; }
		                if ($N == 2)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth"; }
		                if ($N == 3)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		 
		                $concatenate_parameters .= "$cat_directory ";
		        }
		}
		else {
		 
		        ($sec,$min,$hr,$cat_date,$cat_mnth,$cat_yr,$day,$yr_date,$DST) = localtime($cat_end_epoch);
		        $cat_mnth++;
		        $cat_yr += 1900;
		        if ((0 < $cat_mnth) && ($cat_mnth < 10)) { $cat_mnth = "0" . $cat_mnth; }
		        if ((0 < $cat_date) && ($cat_date < 10)) { $cat_date = "0" . $cat_date; }
		 
                	if ($exporter ne "") { 
                        	$cat_directory = "$exporter_directory"; 
                	} else { 
                        	$cat_directory = "$flow_data_directory/$device_name"; 
                	}

		        if ($N == -3) { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		        if ($N == -2) { $cat_directory .= "/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		        if ($N == -1) { $cat_directory .= "/$cat_yr\-$cat_mnth\-$cat_date"; }
		        if ($N == 1)  { $cat_directory .= "/$cat_yr"; }
		        if ($N == 2)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth"; }
		        if ($N == 3)  { $cat_directory .= "/$cat_yr/$cat_yr\-$cat_mnth/$cat_yr\-$cat_mnth\-$cat_date"; }
		 
		        $concatenate_parameters .= "$cat_directory ";
		}
	
		# Set up the flow-tools complete command to generate filtered data for the 5-minute period
		 
		$flowcat_command = "$flow_bin_directory/flow-cat" . " $concatenate_parameters";
		 
		# If using previously generated concatenation files, create one if not already created

		if ($use_existing_concats eq "Y") {
			if (($device_name ne "") && (!$created_concat{$device_name})) {
				$concat_file = "$work_directory/CONCAT_$device_name";
				$create_concat_command = "$flowcat_command > $concat_file";
				system($create_concat_command);
				$file_size = stat($concat_file)->size;
				if ($file_size != 0) {
					$flownfilter_command = "$flow_bin_directory/flow-nfilter -f $work_directory/FlowTracker_Collector_filter -FFlow_Filter";
	        			$flowprint_command = "$flow_bin_directory/flow-print -f5 >$work_directory/FlowTracker_Collector_output";
	        			$flow_run = "$flownfilter_command < $work_directory/CONCAT_$device_name | $flowprint_command";
					$created_concat{$device_name} = 1;
				}
			} elsif (($exporter ne "") && (!$created_concat{$exporter})) {
				$concat_file = "$work_directory/CONCAT_$exporter";
				$create_concat_command = "$flowcat_command > $concat_file";
				system($create_concat_command);
				$file_size = stat($concat_file)->size;
				if ($file_size != 0) {
					$flownfilter_command = "$flow_bin_directory/flow-nfilter -f $work_directory/FlowTracker_Collector_filter -FFlow_Filter";
	        			$flowprint_command = "$flow_bin_directory/flow-print -f5 >$work_directory/FlowTracker_Collector_output";
	        			$flow_run = "$flownfilter_command < $work_directory/CONCAT_$exporter | $flowprint_command";
					$created_concat{$exporter} = 1;
				}
			} else {

				$flownfilter_command = "$flow_bin_directory/flow-nfilter -f $work_directory/FlowTracker_Collector_filter -FFlow_Filter";
	        		$flowprint_command = "$flow_bin_directory/flow-print -f5 >$work_directory/FlowTracker_Collector_output";
	        		$flow_run = "$flownfilter_command < $work_directory/CONCAT_$device_name | $flowprint_command";
			}
		} else {
			$flownfilter_command = "$flow_bin_directory/flow-nfilter -f $work_directory/FlowTracker_Collector_filter -FFlow_Filter";
	        	$flowprint_command = "$flow_bin_directory/flow-print -f5 >$work_directory/FlowTracker_Collector_output";
	        	$flow_run = "$flowcat_command | $flownfilter_command | $flowprint_command";
		}

		if ($debug_tracker eq "Y") { print DEBUG "\n$flow_run\n\n"; }

		system($flow_run);
	
		$period_bits         = 0;
		$total_bytes         = 0;
		$total_overlap_bytes = 0;
		$total_flow_bytes    = 0;
	
		# Parse through all flows that matched the filter, adding bits if part of flow within period

		open(FLOWS,"<$work_directory/FlowTracker_Collector_output");
		while (<FLOWS>) {
	
		        $first_char = substr($_,0,1);
		        if (!($first_char =~ /[0-9]/)) { next; }
		 
		        ($s_time,$e_time,$sif,$sip,$sp,$dif,$dip,$dp,$p,$fl,$pkt,$oct) = split(/\s+/,$_);
			if ($sampling_multiplier > 1) { $oct *= $sampling_multiplier; }
			$total_bytes += $oct;
		 
		        ($smd,$s_tm,$s_ms) = split(/\./,$s_time);
		        ($emd,$e_tm,$e_ms) = split(/\./,$e_time);
		 
		        ($shr,$smn,$ssc) = split(/:/,$s_tm);
		        ($ehr,$emn,$esc) = split(/:/,$e_tm);
		 
		        $s_secs = 3600*$shr + 60*$smn + $ssc;
		        $e_secs = 3600*$ehr + 60*$emn + $esc;
		 
			# Determine flow time length
		
			if ($smd eq $emd) {
		                $flow_length = ($e_secs + ($e_ms/1000)) - ($s_secs + ($s_ms/1000));
		                if ($flow_length <= 0) { $flow_length = 0.001; } }
			else {
		                $flow_length = ($e_secs + ($e_ms/1000)) + (86400 - ($s_secs + ($s_ms/1000)));
		                if ($flow_length <= 0) { $flow_length = 0.001; }
			}
		
		        $ss_delta_md = $smd - $period_start_md;
		        $es_delta_md = $emd - $period_start_md;
		        $se_delta_md = $smd - $period_end_md;
		 
		        if ($ss_delta_md == 0) {
		                $start_delta = ($s_secs + ($s_ms/1000)) - $start_secs; }
		        elsif ($ss_delta_md == 1) {
		                $start_delta = 86400 - $start_secs + ($s_secs + ($s_ms/1000)); }
		        elsif ($ss_delta_md == -1) {
		                $start_delta = ($s_secs + ($s_ms/1000)) - 86400; }
		
		        if ($es_delta_md == 0) {
		                $end_delta = ($e_secs + ($e_ms/1000)) - $start_secs; }
		        elsif ($es_delta_md == 1) {
		                $end_delta = 86400 - $start_secs + ($e_secs + ($e_ms/1000)); }
		        elsif ($es_delta_md == -1) {
		                $end_delta = ($e_secs + ($e_ms/1000)) - 86400; }
		
			# Exclude flows totally outside of the period
		
			if ($start_delta >= $collection_period) { next; }
			if ($end_delta   <= 0)   { next; }
		
			# Compute portion of this flow's bits that are within the period
		
			if ($start_delta < 0) {
				if ($end_delta < $collection_period) {
					$flow_octs = int (($end_delta/$flow_length) * $oct);
					$period_bits += 8 * $flow_octs; }
				else {
					$flow_octs = int (($collection_period/$flow_length) * $oct);
					$period_bits += 8 * $flow_octs; }
				}
			else {
				if ($end_delta < $collection_period) {
					$flow_octs = $oct;
					$period_bits += 8 * $flow_octs; }
				else {
					$flow_octs = int ((($collection_period - $start_delta)/$flow_length) * $oct);
					$period_bits += 8 * $flow_octs; }
			}
		
			$total_overlap_bytes += $flow_octs;
		}
	
		# Get a per-second average for the 5 minute period

		$collection_period_avg = int( $period_bits / $collection_period );

		# Update the appropriate RRD file

		$rrdtool_command = "$rrdtool_bin_directory/rrdtool update $rrdtool_file $period_end:$collection_period_avg";
		system($rrdtool_command);
	
		# If user has requested alerts, notify if threshold exceeded

		if ((($alert_threshold > 0) && ($collection_period_avg > $alert_threshold)) || 
		    (($alert_threshold < 0) && ($collection_period_avg < (-1 * $alert_threshold)))) {

			$time_since_last = $period_end - $alert_last_notified;

			$email_alert = 0;
			if ($alert_frequency eq "eachtime") { 
				$alert_frequency_out = "Each Occurence";
				$email_alert = 1; 
			}
			if (($alert_frequency eq "daily") && ($time_since_last >= 86400)) { 
				$alert_frequency_out = "Once a Day";
				$email_alert = 1; 
			}

			if ($email_alert) {

				if ($alert_threshold < 0) {
					$action = "not met";
					$threshold_out = -1 * $alert_threshold;
				} else {
					$action = "exceeded";
					$threshold_out = $alert_threshold;
				}

                        	$alert_subject = "FlowTracker threshold $action: $tracking_file"; 
				$tracking_link = "$FlowViewer_service://$FlowViewer_server$tracker_short/$tracking_file/index.html";
				$tracking_label_out = $tracking_label;
				$tracking_label_out =~ s/ /~/g;
				$revise_link = "$FlowViewer_service://$FlowViewer_server$cgi_bin_short/FlowTracker_Main.cgi?Solicit^$tracking_label_out";

                        	$mail_command = "|mail -s \"$alert_subject\" $alert_destination"; 
                        	open (MAIL, "$mail_command"); 
                        	print MAIL "\nThe flow-rate of the $tracking_file FlowTracking";
				print MAIL " has $action the configured Alert Threshold.\n\n";
                        	print MAIL "Start of Period: $start_flows\n";
                        	print MAIL "  End of Period: $end_flows\n";
                        	print MAIL "      Threshold: $threshold_out\n";
                        	print MAIL " Observed Value: $collection_period_avg\n";
                        	print MAIL "\nSee: $tracking_link\n";
                        	print MAIL "\nCurrent Alert Frequency: \'$alert_frequency_out\'. To turn off these emails, set the Alert Frequency to \'No Notification\'.\n";
                        	print MAIL "\nSee: $revise_link\n";
                        	close MAIL;

				# Update the Alert Last Notified field in the Filter file

				open (EXISTING,"<$existing_filter");
				open (TEMP,">$work_directory/temp.fil");
				while (<EXISTING>) { 
                        		$key = substr($_,0,8); 
                        		if ($key eq " input: ") { 
                                		($input,$field,$field_value) = split(/: /); 
                                		if    ($field eq "alert_last_notified") {  
                                        		print TEMP " input: alert_last_notified: $period_end\n";
                                		} else { print TEMP $_; }  
					} else { print TEMP $_; }
				}
				close TEMP;
				close EXISTING;

				$copy_command = "cp $work_directory/temp.fil $existing_filter";
				system($copy_command);
				chmod $filter_file_perms, $existing_filter;
			}
		}

		if ($collection_period_avg != 0) { $num_non_zero++; } else { $num_zero++; }

		if ($log_collector_med eq "Y") {
			printf LOG "%-40s %-15s\n", $tracking_label, $collection_period_avg;
		} elsif ($log_collector_long eq "Y") {
			print LOG "                     For: $tracking_label\n";
			print LOG "            RRDtool File: $rrdtool_file\n";
			print LOG "    Bytes from all Flows: $total_bytes\n";
			print LOG "Bytes overlapping Period: $total_overlap_bytes\n";
			print LOG "          Bits in Period: $period_bits\n";
			print LOG "        CollectPeriodAvg: $collection_period_avg\n";
			print LOG "                  Update: $period_end : $collection_period_avg\n";
			print LOG "\n";
		}
	}

	if ((!$just_started_up) && (-e "$work_directory/CONCAT*")) {
		$cleanup_command = "rm $work_directory/CONCAT*";
		system ($cleanup_command);
	}

	$just_started_up = 0;

	$end_collect_time = time;
	$loop_time = $end_collect_time - $start_collect_time;

	($sec,$min,$hr,$date,$mnth,$yr,$day,$yr_date,$DST) = localtime(time);
	$current_date = format_date($sec,$min,$hr,$date,$mnth,$yr);
	
	if (($log_collector_short eq "Y") || ($log_collector_med eq "Y") || ($log_collector_long eq "Y")) {
		print LOG "$num_zero trackings had a zero value. $num_non_zero trackings had a positive value.\n";
		print LOG "At $current_date finished this loop. Update period: $period_end  $num_trackings Trackings. Loop took: $loop_time seconds\n"; 
	}

	close (LOG);
	close (DEBUG);

	$sleep_period = $collection_period - $loop_time;
	sleep ($sleep_period);
}
