#To unpack, delete all lines before this and feed to /bin/sh echo news_dups 1>&2 sed -e 's/^X//' >news_dups <<'END' X#! perl X# X# $Id: news_dups,v 1.1 1992/04/08 17:36:18 wengland Exp wengland $ X# X# $Log: news_dups,v $ X# Revision 1.1 1992/04/08 17:36:18 wengland X# Initial revision X# X# X# Xeval "exec perl -S $0 $*" X if $running_under_some_shell; X X## C-NEWS Duplicate Article Reporting program. X## X X## X # (c) Copyright 1992, Stephen Software Systems, Inc. X # X # This software is furnished under the terms of the GNU X # public license and is freely distributable. X # X # This software is supplied FREE of charge and comes with X # no warrenties expressed or implied. The author is not X # responsible for any damages, monetary or otherwise. X # X # USE AT YOUR OWN RISK. X## X X## X # This program reads the news history and recent log files X # to determine where duplicate articles are coming from X # and what groups the duplicate articles are in. X # X # This program is only usefull on systems running X # C-News that have the unix tail and grep utilitys. X # X # This program is really just a quick hack to get the X # job done. Feel free to improve/fix it. :-) X X $news_dir = '/usr/lib/news'; X X ## Read in the last 6% of the History file. X # This number may need to change on different X # systems. X # X $skip_factor = 1-.06; X X $block_skip = int((-s "$news_dir/history")/512*$skip_factor)."b"; X X open(HISTORY, "tail +$block_skip $news_dir/history|")|| X die "History pipe open failure."; X X open(DUPLICATES, "grep duplicate $news_dir/log*|")|| X die "Could not open duplicate pipe."; X X ## Build an associative array that will let us find the X # news groups a particular article was filed in. X # X X $_= ; # Throw away first history line from tail X X $newsarts = (); X $n=0; X while(){ X X# Debug # X# if ($n%4096 == 0){ X# $y = $x; X# $x = $n -$dead; X# $y = $x -$y; X# print "Dead = $dead, Total = $n $x $y\n"; X# } X X $n++; X X ($art_id, $date, @newsgroups) = split(/\s+/); X if (!@newsgroups){ X $dead++; X next; X } X X local($ng,$newsgroups)=(); X $art_id =~ s/(\<|\>)//g; X X while ( $ngrp = pop(@newsgroups)){ X $ngrp =~ s/\/.+$//; X $newsgroups .= "$ngrp|"; X } X chop $newsgroups; X $newsarts{$art_id} = $newsgroups; X } X X X ## Match duplicate articles with history. X # X $n=$no_history = 0; X while(){ X $n++; X ($month,$day,$time, $site, $f_ok, $art_id) = split( /\s+/); X $art_id =~ s/(\<|\>)//eg; X $newsgroups = $newsarts{$art_id}; X if($newsgroups){ X foreach $ng( split(/\|/,$newsgroups)){ X $duplicate_array{"$site $ng"}++; X } X }else{ X $no_history++; X } X } X X $pcnt_nohist = int($no_history/$n*100).'%'; Xprint "Not enough history selected for $pcnt_nohist of duplicate articles.\n"; X X &report_it(STDOUT, %duplicate_array); X X X## Thats all Folks! ## X Xsub report_it{ X local($fh, %list) = @_; X local($lines) = 0; X X foreach $key (sort keys %list){ X if( length($list{$key})){ X $lines++; X printf($fh "%-35s %s\n",$key, $list{$key}) if $list{$key} > 5; X } X } X X print $fh "Null record." if !$lines; X print $fh "\n"; X} END