#To unpack, delete all lines before this and feed to /bin/sh
echo news_dups 1>&2
sed -e 's/^X//' >news_dups <<'END'
X#! perl
X#
X# $Id: news_dups,v 1.1 1992/04/08 17:36:18 wengland Exp wengland $
X#
X# $Log: news_dups,v $
X# Revision 1.1  1992/04/08  17:36:18  wengland
X# Initial revision
X#
X#
X#
Xeval "exec perl -S $0 $*"
X	if $running_under_some_shell;
X
X## C-NEWS Duplicate Article Reporting program.
X##
X
X##
X #  (c) Copyright 1992, Stephen Software Systems, Inc.
X #
X #  This software is furnished under the terms of the GNU
X #  public license and is freely distributable.
X #
X #  This software is supplied FREE of charge and comes with
X #  no warrenties expressed or implied.  The author is not
X #  responsible for any damages, monetary or otherwise.
X #
X #               USE AT YOUR OWN RISK.
X##
X
X##
X # This program reads the news history and recent log files
X # to determine where duplicate articles are coming from
X # and what groups the duplicate articles are in.
X #
X # This program is only usefull on systems running
X # C-News that have the unix tail and grep utilitys.
X # 
X # This program is really just a quick hack to get the
X # job done.  Feel free to improve/fix it.  :-)
X
X $news_dir = '/usr/lib/news';
X
X ## Read in the last 6% of the History file.
X  # This number may need to change on different 
X  # systems.
X  #
X $skip_factor = 1-.06;
X
X $block_skip = int((-s "$news_dir/history")/512*$skip_factor)."b";
X
X open(HISTORY, "tail +$block_skip $news_dir/history|")|| 
X	die "History pipe open failure.";
X
X open(DUPLICATES, "grep duplicate $news_dir/log*|")||
X	die "Could not open duplicate pipe.";
X
X ## Build an associative array that will let us find the 
X  # news groups a particular article was filed in.
X  #
X
X $_= <HISTORY>; # Throw away first history line from tail
X
X $newsarts = ();
X $n=0;
X while(<HISTORY>){
X
X# Debug #
X#      if ($n%4096 == 0){
X#	  $y = $x;
X#	  $x = $n -$dead;
X#	  $y = $x -$y;
X#	  print "Dead = $dead, Total = $n $x $y\n";
X#	  }
X
X     $n++;
X
X     ($art_id, $date, @newsgroups) = split(/\s+/);
X     if (!@newsgroups){
X	$dead++;
X	next;
X      }
X
X      local($ng,$newsgroups)=();
X      $art_id =~ s/(\<|\>)//g;
X
X      while ( $ngrp = pop(@newsgroups)){
X	    $ngrp =~ s/\/.+$//;
X	    $newsgroups .= "$ngrp|";
X      }
X      chop $newsgroups;
X      $newsarts{$art_id} = $newsgroups;
X }
X
X
X  ## Match duplicate articles with history. 
X   #
X    $n=$no_history = 0;
X    while(<DUPLICATES>){
X	$n++;
X	($month,$day,$time, $site, $f_ok, $art_id) = split( /\s+/);
X        $art_id =~ s/(\<|\>)//eg;
X	$newsgroups = $newsarts{$art_id};
X	if($newsgroups){
X	    foreach $ng( split(/\|/,$newsgroups)){
X		$duplicate_array{"$site  $ng"}++;
X	     }
X	}else{
X	    $no_history++;
X	}
X    }
X
X  $pcnt_nohist = int($no_history/$n*100).'%';
Xprint "Not enough history selected for $pcnt_nohist of duplicate articles.\n";
X
X  &report_it(STDOUT, %duplicate_array);
X
X
X## Thats all Folks! ##
X
Xsub report_it{
X   local($fh, %list) = @_;
X   local($lines)     = 0;
X
X    foreach $key (sort keys  %list){
X        if( length($list{$key})){
X            $lines++;
X            printf($fh "%-35s %s\n",$key, $list{$key}) if $list{$key} > 5;
X        }
X    }
X
X    print $fh "Null record." if !$lines;
X    print $fh "\n";
X}
END
