INN commit: branches/2.5 (4 files)

INN Commit rra at isc.org
Sat Aug 6 20:17:28 UTC 2011


    Date: Saturday, August 6, 2011 @ 13:17:28
  Author: iulius
Revision: 9310

sendinpaths:  complete rewrite in Perl

Switch to a new sendinpaths program written in Perl.
Based on Mohan Kokal's work.

New -c, -d and -h flags for a copy of the e-mail to the newsmaster,
a debug mode, and a usage information.

Modified:
  branches/2.5/backends/sendinpaths.in
  branches/2.5/doc/pod/news.pod
  branches/2.5/doc/pod/ninpaths.pod
  branches/2.5/doc/pod/sendinpaths.pod

-------------------------+
 backends/sendinpaths.in |  269 +++++++++++++++++++++++++++++++---------------
 doc/pod/news.pod        |   11 +
 doc/pod/ninpaths.pod    |   12 +-
 doc/pod/sendinpaths.pod |   16 ++
 4 files changed, 215 insertions(+), 93 deletions(-)

Modified: backends/sendinpaths.in
===================================================================
--- backends/sendinpaths.in	2011-08-06 19:34:10 UTC (rev 9309)
+++ backends/sendinpaths.in	2011-08-06 20:17:28 UTC (rev 9310)
@@ -1,97 +1,194 @@
-#!/bin/sh
-# fixscript will replace this line with code to load innshellvars
-#
-# Submit path statistics based on ninpaths.
-# $Id$
+#! /usr/bin/perl -w
+# fixscript will replace this line with code to load INN::Config
 
-# Assuming the ninpaths dump files are in ${MOST_LOGS}/path/inpaths.%d files.
+##  $Id$
+##
+##  Submit Path: statistics based on ninpaths.
+##
+##  A rewrite of the sendinpaths shell script in Perl, based on Mohan Kokal's
+##  initial work.  Improved by Julien Elie.
+##
+##  Prerequisites:
+##    The ninpaths dump files are in the ${INN::Config::pathlog}/path directory.
+##    The ninpaths dump filenames begin with "inpaths.".
+##    The ninpaths program is installed in ${INN::Config::pathbin}.
 
-cd ${MOST_LOGS}/path
-ME=`${NEWSBIN}/innconfval pathhost`
+use strict;
+use Getopt::Std;
 
-USAGE="Usage: sendinpaths [-n] [-k keep-days] [-r report-days] [address [address ...]]"
-NOMAIL=false
-MAILTO=""
-DEFAULTMAILTO="pathsurvey at top1000.org top1000 at anthologeek.net"
+my $pathhost    = "$INN::Config::pathhost";
+my $ninpaths    = "$INN::Config::pathbin/ninpaths";
+my $ninpathsdir = "$INN::Config::pathlog/path";
+my $alsoto      = "$INN::Config::newsmaster";
+my $sm          = "$INN::Config::mta";
 
-# Default to report up to 32 days (ideal for monthly statistics).  It works fine
-# for daily stats too because already processed dump files are deleted by default
-# (0 day of kept articles).
-REPORT=32
-KEEP=0
-NINPATHS_ARGS=""
+##  Default to report up to 32 days (ideal for monthly statistics).  It works fine
+##  for daily stats too because already processed dump files are deleted by default
+##  (0 day of kept articles).
+my $reportdays  = 32;
+my $keepdays    = 0;
 
-# Parse command-line arguments.
-while [ $# -gt 0 ]
-do
-  case "$1" in
-  -k)
-    case "$2" in
-    *[^0-9]*)
-      echo "Argument to -k flag must be an integer."
-      exit 1
-      ;;
-    esac
-    KEEP=$2
-    shift
-    ;;
-  -n)
-    NOMAIL=true
-    ;;
-  -r)
-    case "$2" in
-    *[^0-9]*)
-      echo "Argument to -r flag must be an integer."
-      exit 1 
-      ;;
-    esac
-    REPORT=$2
-    shift
-    ;;
-  -*)
-    echo $USAGE
-    exit 1
-    ;;
-  *)
-    MAILTO="${MAILTO} $1"
-    ;;
-  esac
-  shift
-done
+my @emailto     = ('pathsurvey at top1000.org', 'top1000 at anthologeek.net');
+my $emaildef    = join(' ', @emailto);
+my $debug       = 0;
 
-# Renice to give other processes priority, since this isn't too important.
-renice 20 -p $$ > /dev/null 2>&1
+my %opt;
 
-# Make report from (up to) $REPORT days of dumps.
-LOGS=`find . -name 'inpaths.*' ! -size 0 \( -mtime -${REPORT} -o -mtime ${REPORT} \) -print`
-if [ -z "$LOGS" ] ; then
-  echo "No data has been collected since the last run of this script!"
-  exit 1
-fi
+$0 =~ s!.*/!!;
 
-# Process dumps.
-for i in $LOGS
-do
-  ninpaths -u ${i} -r ${ME} > /dev/null 2>&1
-  if test $? -eq 0 ; then
-    NINPATHS_ARGS="${NINPATHS_ARGS} -u ${i}"
-  else
-    echo "Skipping unrecognized inpaths file ${i}"
-  fi
-done
 
-if [ -z "${NINPATHS_ARGS}" ] ; then
-  echo "No valid data has been collected since the last run of this script!"
-  exit 1
-fi
+sub usage {
+    print <<_end_;
+Usage:
+  $0 [-cdhn] [-k keep-days] [-r report-days] [address [address ...]]
 
-if [ "${NOMAIL}" = "true" ] ; then
-  ninpaths ${NINPATHS_ARGS} -r ${ME}
-else
-  ninpaths ${NINPATHS_ARGS} -r ${ME} |\
-    ${MAILCMD} -s "inpaths ${ME}" ${MAILTO:-$DEFAULTMAILTO}
-  # Remove dumps older than $KEEP days.
-  find . -name 'inpaths.*' \( -mtime +${KEEP} -o -mtime ${KEEP} \) -exec rm '{}' \;
-fi
+  If called without any arguments, reports are generated and auto
+  submitted to the inpaths accumulation site.
 
-exit 0
+  -c               sends a copy of the submitted mail to "$alsoto" (newsmaster's address)
+  -d               enables debug messages
+  -h               prints this help message
+  -k keep-days     specifies how many days to keep processed dump files
+  -n               nomail:  gathers stats, but does not auto-submit e-mails
+  -r report-days   specifies how many days of dump files should be processed
+
+  Current default submit address: [$emaildef].
+  The optional arguments [address [address ...]] may be used to override this default.
+_end_
+    exit(1);
+}
+
+
+sub main {
+  my (@files, @validfiles, @oldfiles, @appendargs, @cmd);
+  my $pid;
+  my $sendout = '';
+
+  getopts('cdhk:nr:', \%opt) || usage();
+  usage() if defined $opt{'h'};
+
+  # If we took an e-mail argument, override the default submission addresses.
+  @emailto = @ARGV if ($#ARGV >= 0);
+  push (@emailto, $alsoto) if defined $opt{'c'};
+
+  # Override default parameters.
+  $keepdays = $opt{'k'} if defined $opt{'k'};
+  usage() if $keepdays !~ /\d+/;
+
+  $reportdays = $opt{'r'} if defined $opt{'r'};
+  usage() if $reportdays !~ /\d+/;
+
+  # Set debug.
+  $debug = 1 if defined $opt{'d'};
+
+  # Scan the ninpaths directory.
+  opendir(my $DIR, $ninpathsdir) || die "cannot open $ninpathsdir:  $!\n";
+  @files = readdir($DIR);
+  closedir($DIR);
+
+  chdir($ninpathsdir) || die "cannot chdir $ninpathsdir:  $!\n";
+
+  foreach my $file (@files) {
+    # Process only inpaths files.
+    next if (! -f $file);
+    next if ($file !~ /^inpaths\./);
+
+    # Get a listing of all the wanted files to process.
+    # -s for nonzero size, -M for last modification age in days.
+    if (-s $file && int(-M $file) <= $reportdays) {
+      push @validfiles, $file;
+    }
+
+    # Now get the listing of all the files that will be removed.
+    if (int(-M $file) >= $keepdays) {
+      push @oldfiles, $file;
+    }
+  }
+
+  if ($#validfiles < 0) {
+    print "No data has been collected since the last run of this script!\n";
+    return;
+  }
+
+  # Process each dump file.
+  foreach my $file (@validfiles) {
+    @cmd = ($ninpaths, '-u', $file, '-r', $pathhost);
+
+    printf("exec'ing %s\n", join(' ', @cmd)) if $debug;
+
+    $pid = open(my $NINPATHS, '-|');
+    die "cannot fork:  $!\n" if $pid < 0;
+    if ($pid == 0) {
+      exec(@cmd) || die "cannot exec ninpaths:  $!\n";
+    } else {
+      while (<$NINPATHS>) {
+        ;
+      }
+      close($NINPATHS) || next;
+    }
+
+    if ($? == 0) {
+      push(@appendargs, ('-u', $file));
+    }
+  }
+
+  if ($#appendargs < 0) {
+    print "No valid data has been collected since the last run of this script!\n";
+    return;
+  }
+
+  # Prepare to send reports, and purge old entries from disk.
+  @cmd = ($ninpaths, @appendargs, '-r', $pathhost);
+
+  printf("exec'ing %s\n", join(' ', @cmd)) if $debug;
+
+  $pid = open(my $NINPATHS, '-|');
+  die "cannot fork:  $!\n" if $pid < 0;
+  if ($pid == 0) {
+    exec(@cmd) || die "cannot exec ninpaths:  $!\n";
+  } else {
+    while (<$NINPATHS>) {
+      $sendout .= $_;
+    }
+    close($NINPATHS) || die "execution of ninpaths failed:  $!\n";
+  }
+
+  if (defined $opt{'n'}) {
+    # We are not sending this report anywhere, but to stdout.
+    print $sendout;
+  } else {
+    if ($sm =~ /%s/) {
+      $sm = sprintf($sm, join(' ', @emailto));
+    } else {
+      $sm .= ' ' . join(' ', @emailto);
+    }
+
+    print "exec'ing $sm\n" if $debug;
+
+    # Fork and spawn the MTA without using the shell.
+    $pid = open(my $MTA, '|-');
+    die "cannot fork:  $!\n" if $pid < 0;
+    if ($pid == 0) {
+      exec(split(/\s+/, $sm)) || die "cannot exec $sm:  $!";
+    } else {
+
+    print $MTA 'To: ' . join(",\n\t", @emailto);
+    print $MTA "\nSubject: inpaths $pathhost\n\n";
+    print $MTA $sendout;
+    print $MTA "\n";
+    close($MTA) || die "execution of $sm failed:  $!\n";
+    }
+
+    # Remove old dumps.
+    foreach my $file (@oldfiles) {
+      print "removing $file\n" if $debug;
+      unlink($file);
+    }
+  }
+
+  return;
+}
+
+main();
+
+exit 0;

Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod	2011-08-06 19:34:10 UTC (rev 9309)
+++ doc/pod/news.pod	2011-08-06 20:17:28 UTC (rev 9310)
@@ -50,11 +50,16 @@
 
 It is now possible to properly generate daily statistics with B<sendinpaths>
 thanks to the new B<-k> and B<-r> flags that permit to control the interval
-of days for processing dump files.
+of days for processing dump files.  The new B<-c> flag permits to send a
+copy of the generated e-mail to the newsmaster.
 
-Also fixed an issue with statistics that could be missing for a couple of
-days when monthly sent.
+Also fixed an issue with statistics that could be missing or duplicated
+for a couple of days when monthly sent.
 
+The documentation has been updated and mentions a preferred daily run of
+B<sendinpaths>.  This script is a complete rewrite in Perl, and is based
+on Mohan Kokal's initial work.
+
 =item *
 
 B<cnfsheadconf> now properly recognizes continuation lines in

Modified: doc/pod/ninpaths.pod
===================================================================
--- doc/pod/ninpaths.pod	2011-08-06 19:34:10 UTC (rev 9309)
+++ doc/pod/ninpaths.pod	2011-08-06 20:17:28 UTC (rev 9310)
@@ -22,7 +22,7 @@
 Some central sites accumulate the Path: data from many news servers
 running this program or one like it, and then report statistics on
 the most frequently seen news servers in Usenet article Path: lines.
-The B<sendinpaths> shell script can be run daily to mail the accumulated
+The B<sendinpaths> script can be run daily to mail the accumulated
 statistics to such a site and remove the old dump files.
 
 You can get a working setup by doing the following:
@@ -46,6 +46,10 @@
 <pathbin> with the full path to your INN binaries directory, and
 <pathlog> with the full path to your INN log directory.
 
+Note that the naming convention of the generated inpaths dump files should
+not be changed.  B<sendinpaths> explicitly searches files whose name starts
+with C<inpaths.> in the <pathlog>/path directory.
+
 =item 3.
 
 Run the following command to start logging these statistics:
@@ -64,8 +68,10 @@
 the dumps, makes a report, sends the collected statistics, and deletes
 the old dumps.
 
-Note that you can manually generate a report without mailing it, and
-without deleting processed dump files, with C<sendinpaths -n>.
+Note that you can manually generate a report without mailing it, and without
+deleting processed dump files, with C<sendinpaths -n>.  Another useful
+command is C<sendinpaths -c> so as to receive a copy of the e-mail sent
+by B<sendinpaths> and therefore make sure that everything is properly set.
 
 =item 5.
 

Modified: doc/pod/sendinpaths.pod
===================================================================
--- doc/pod/sendinpaths.pod	2011-08-06 19:34:10 UTC (rev 9309)
+++ doc/pod/sendinpaths.pod	2011-08-06 20:17:28 UTC (rev 9310)
@@ -4,7 +4,7 @@
 
 =head1 SYNOPSIS
 
-B<sendinpaths> [B<-n>] [B<-k> I<keep-days>] [B<-r> I<report-days>]
+B<sendinpaths> [B<-cdhn>] [B<-k> I<keep-days>] [B<-r> I<report-days>]
 [I<address> [I<address> ...]]
 
 =head1 DESCRIPTION
@@ -27,6 +27,20 @@
 
 =over 4
 
+=item B<-c>
+
+When this flag is used, the report is also e-mailed, besides the default
+submission addresses or those given as command-line arguments, to the
+newsmaster's address set at configure time.
+
+=item B<-d>
+
+Enables debug messages.
+
+=item B<-h>
+
+Gives usage information.
+
 =item B<-k> I<keep-days>
 
 After having processed dump files, B<sendinpaths> removes those that are




More information about the inn-committers mailing list