Article 8659 of comp.lang.perl:
Xref: feenix.metronet.com alt.sources:2388 comp.lang.perl:8659 news.software.b:2867
Newsgroups: alt.sources,comp.lang.perl,news.software.b
Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!gatech!swrinde!sgiblab!a2i!dhesi
From: dhesi@rahul.net (Rahul Dhesi)
Subject: update 'min' and 'max' fields in 'active' file
Message-ID: <CHLwnE.26B@rahul.net>
Followup-To: alt.sources.d,comp.lang.perl,news.software.b
Sender: news@rahul.net (Usenet News)
Nntp-Posting-Host: bolero
Organization: a2i network
Date: Mon, 6 Dec 1993 09:21:14 GMT
Lines: 254

Checksum:  204823036 (verify with 'brik')
Submitter: Rahul Dhesi <dhesi@rahul.net>
Archive-name: perl-source/dhesi/updminmax

#! /usr/bin/perl

# (C) Copyright 1993 Rahul Dhesi, All rights reserved.
# Permission for copying and creation of derivative works is granted,
# provided this copyright notice is preserved, to anybody who
# does not discriminate against the copyright owner.

# $Source: /news/bin/RCS/updminmax,v $
# $Id: updminmax,v 1.17 1993/12/06 09:12:31 news Exp $
#
# Update min and max fields in active file.  Based on ideas in
# two shell scripts that come with C-News.  This program
# gets newsgroup names from the active file, gets the article numbers
# from the spool directory for each newsgroup, and corrects the min
# and max fields in the active file.  It is useful for keeping the
# min field updated.  It is also useful for correcting errors in the
# max field, which have been known to occur occasionally.  A field length
# of 5 digits for the min field and 10 digits for the max field
# is forced -- this can be changed in the code by revising two
# sprintf statements.  The entire news system is locked while this program
# runs.  At the a2i network this program completes in about 8 to 10
# minutes.  Locking is done by creating a symbolic link.
#
# This script is offered "as is", though it appears to work under
# SunOS 4.1.1 running C-News derived from the May 1992 release and
# using perl 4.019.

$myname = "updminmax";

# Configuration information
$NEWSCTL = '/news/lib';		# news lib (where active file resides)
$NEWSARTS = '/news/news';	# news spool (where articles reside)

# following will be changed only rarely
$ACTIVE = 'active';			# name of active file
$BACKUP = "$ACTIVE.before.$myname";	# back up active file by this name
$INF = (1 << 30);		# large no. bigger than all article numbers
$ona2i = 0;			# are we on the a2i network?

$RCSHEADER = '$Source: /news/bin/RCS/updminmax,v $' . "\n" .
	'$Id: updminmax,v 1.17 1993/12/06 09:12:31 news Exp $';

$usage = "usage: $myname [-vtxnmCD] arg ... (or -h for help)";

if ($ARGV[0] =~ "^-.+" ) {
   require "getopts.pl";
   &Getopts("vtxhnmCD");
}

$debug = $opt_x;
$trace = $opt_t;
$verbose = $debug || $trace || $opt_v;

$nolock = $opt_n;
$clean = $opt_C;
$diff = $opt_D;
$summarize_max = $opt_m;

if ($opt_h) {
   &givehelp();
   exit(0);
}

chdir($NEWSCTL) || die "$myname: error: can't cd to $NEWSCTL: $!\n";
open(ACTIVE, $ACTIVE) || 
   die "$myname: error: can't read $ACTIVE file: $!\n";

if (! $trace) {
   $newactive = "$ACTIVE.new.$$"; unlink($newactive);
   (-f $newactive) && die "$myname: error: can't unlink $newactive\n";
   open(NEWACTIVE, ">$newactive") ||
      die "$myname: error: can't write to $newactive\n";
}

if ($verbose) {
   $| = 1;
}

# trap signals so we can unlock any lock on a signal
$SIG{'INT'} = 'signal';
$SIG{'TERM'} = 'signal';
$SIG{'QUIT'} = 'signal';
$SIG{'TERM'} = 'signal';
$SIG{'PIPE'} = 'signal';
# lock news system -- will wait forever for lock
&newslock() unless $nolock;

$trace && print "$myname: TRACE: no writes to disk\n";

# do it
eval '&main';
&newsunlock unless $nolock;
$@ && die $@;
exit;

sub main {
   while (<ACTIVE>) {
      ($group, $max, $min, $fourth) = split(' ', $_);
      (! $fourth) && die "$myname: fatal: less than 4 fields in $ACTIVE\n";
      ($dir = $group) =~ s#\.#/#g;		# map ng name to directory name
      if (opendir(DIR, "$NEWSARTS/$dir")) {
	 $newmax = 0;
	 $newmin = $INF;			# infinity

	 # find min and max fields by reading  directory entries
	 while ($entry = readdir(DIR)) {
	    if ($entry =~ /^\d+$/) {	# if all numeric
	       if ($entry < $newmin) {
		  $newmin = $entry;
	       }
	       if ($entry > $newmax) {
		  $newmax = $entry;
	       }
	    } elsif ($debug) {
	       if ($entry ne '.' && $entry ne '..' && $entry ne '.overview') {
		  print "non-numeric: $NEWSARTS/$dir/$entry\n";
	       }
	    }
	 }
	 if ($newmin == $INF) {
	    # no articles found
	    $newmax = $max;		# old value of max
	    $newmin = $max + 1;		# .. and min is one higher
	 }
	 # don't let max get smaller.
	 if ($newmax < $max) {
	    $newmax = $max;
	 }
	 $newmin = sprintf("%05u", $newmin);
	 $newmax = sprintf("%010u", $newmax);
	 print NEWACTIVE "$group $newmax $newmin $fourth\n";
	 $verbose && print "$group $newmax $newmin $fourth\n";
	 if ($summarize_max && $max != $newmax) {
	    printf "ng %20s max $max -> $newmax, min $min -> $newmin\n",
	       $group;
	 }
      } else {
	 if ($clean) {
	    $verbose && "$myname: omitting $group: no spool directory\n";
	 } else {
	    print NEWACTIVE "$group $max $min $fourth\n";
	    $verbose && print "$group $max $min $fourth\n";
	 }
      }
   }
   close(ACTIVE);
   close(NEWACTIVE);

   if (! $trace) {
      if ($diff) {
	 system "diff -u $ACTIVE $newactive > $ACTIVE.diff 2>&1";
      }
      $oldlen = `wc -l < '$ACTIVE'`;  chop $oldlen;
      $newlen = `wc -l < $newactive`; chop $newlen;
      if ($oldlen == $newlen) {
	 # a2i use only -- preserve a numbered copy of the active file
	 $ona2i && system("/news/bin/jab -P $ACTIVE");
	 unlink $BACKUP;
	 link($ACTIVE, $BACKUP) ||
	    warn "$myname: could not link $ACTIVE to $BACKUP: $!\n";
	 rename($newactive, $ACTIVE) ||
	    die "$myname: error: rename $newactive $ACTIVE: $!\n";
      } else {
	die "$myname: $newactive is bad; aborting, leaving $ACTIVE unchanged\n";
      }
   }
}

## sub usage_error {
##    local($msg) = @_;
##    if ($msg) {
##       die "$msg\n";
##    } else {
##       die "$usage\n";
##    }
## }

sub givehelp {
   ## require 'local/page.pl';
   ## &page(<<EOF);
   print <<EOF;
$usage

This program updates min and max fields in the $ACTIVE file.  It locks the
entire news system while it runs.

   -C		Clean $ACTIVE file by omitting groups whose spool
		directories could not be accessed.
   -m		When a max field is changed, print min and max changes to
		stdout.
   -D		Save a diff of original and updated active file
		into $ACTIVE.diff.
   -v		Be verbose.
   -t		Trace only -- show what would be done but don't do it.
		The news system will not be locked.
   -x		Enable debugging -- for program maintainers.
   -n		Don't lock news system (risky to use this except for testing)

$RCSHEADER
EOF
}

# Lock the news system putting into the global variable @UNLOCK_LIST
# a list of filenames to be deleted when &newsunlock is called.
sub newslock {
   $lock = "$NEWSCTL/LOCK";
   local($ltemp) = "$NEWSCTL/L.$$";
   @UNLOCK_LIST && die "$myname: newslock: internal error: " .
      "nonempty UNLOCK_LIST: @UNLOCK_LIST\n";

   @UNLOCK_LIST = ($ltemp);		# newsunlock will delete these

   if ($trace) {
      print "newslock: trace: would lock: $lock -> $ltemp\n";
      return;
   }

   open(LTEMP, ">$ltemp") || die "$myname: error: news lock failed\n";
   print LTEMP "$$" || die "$myname: error: news lock failed\n";
   close(LTEMP);

   for ( ; ; ) {
      if (symlink($ltemp, $lock)) {
	 push(@UNLOCK_LIST, $lock);
	 $debug && print "$myname: debug: locked: $lock -> $ltemp\n";
	 return;
      }
      $verbose && print "$myname: debug: waiting 30 seconds for lock\n";
      sleep(30);
   }
}

sub newsunlock {
   if (@UNLOCK_LIST) {
      $verbose && print "newsunlock: unlink @UNLOCK_LIST\n";
      (! $trace) && unlink @UNLOCK_LIST;
   } else {
      $verbose && print "newsunlock: empty UNLOCK_LIST\n";
   }
}

# signal handler
# -- newsunlock, then exit
sub signal {
   &newsunlock();
   unlink $newactive;
   exit;
}

##### == END ==


