INN commit: branches/2.5/control (docheckgroups.in)

INN Commit rra at isc.org
Tue Aug 16 14:33:19 UTC 2011


    Date: Tuesday, August 16, 2011 @ 07:33:18
  Author: iulius
Revision: 9344

docheckgroups:  no longer use awk

On a few systems, docheckgroups fails because of the use of an old
version of awk.  A few awk implementations have a limit in the size
of the input they can process; it then cause issues with large
newsgroups files.

Use Perl instead of awk for the time being.  It solves these issues.
Successfully tested on a system where docheckgroups was previously
failing.  Thanks to John F. Morse for the bug report.

The next step will be a total rewrite of docheckgroups.  It will
be much more maintainable as a Perl script.

Modified:
  branches/2.5/control/docheckgroups.in

------------------+
 docheckgroups.in |   59 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 16 deletions(-)

Modified: docheckgroups.in
===================================================================
--- docheckgroups.in	2011-08-16 14:28:07 UTC (rev 9343)
+++ docheckgroups.in	2011-08-16 14:33:18 UTC (rev 9344)
@@ -23,8 +23,10 @@
 fi
 
 ##  Copy the message without excluded newsgroups and append local newsgroups.
-cat | ${AWK} '{ if ($1 !~ /'"${2:-^#}"'/ && $1) { print $0 } }' >${T}/$$msg
-test -f ${LOCALGROUPS} && cat ${LOCALGROUPS} | ${EGREP} -v "^#" >>${T}/$$msg
+cat | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+          print $_ if ((scalar(@fields) > 0) && ($fields[0] !~ /'${2:-^#}'/)
+              && ($fields[0] !~ /^#/)); }' > ${T}/$$msg
+test -f ${LOCALGROUPS} && cat ${LOCALGROUPS} | ${EGREP} -v "^#" >> ${T}/$$msg
 
 ##  Exit if there is no matching newsgroup (otherwise docheckgroups is eager
 ##  to delete everything).
@@ -43,22 +45,31 @@
 PATS=`${SED} <${T}/$$msg \
         -e 's/[ 	].*//' -e 's/\..*//' \
         -e 's/^!//' -e '/^$/d' \
-        -e 's/^/^/' -e 's/$/[. 	]/' \
+        -e 's/^/^/' -e 's/$/[\.\w]/' \
     | ${SORT} -u \
-    | (tr '\012' '|' ; echo '' )\
+    | (tr '\012' '|' ; echo '' ) \
     | ${SED} -e 's/|$//'`
 
 ##  Check for missing and obsolete newsgroups in active.
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $1 } }' ${ACTIVE} | ${SORT} >${T}/$$active
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $1 } }' ${T}/$$msg | ${SORT} >${T}/$$newsgrps
+cat ${ACTIVE} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+    print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+        && ($fields[0] =~ /'${PATS}'/)); }' | ${SORT} > ${T}/$$active
+cat ${T}/$$msg | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+    print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+        && ($fields[0] =~ /'${PATS}'/)); }' | ${SORT} > ${T}/$$newsgrps
 
 comm -13 ${T}/$$active ${T}/$$newsgrps >${T}/$$missing
 comm -23 ${T}/$$active ${T}/$$newsgrps >${T}/$$remove
 
 ##  Check for proper moderation flags in active (we need to be careful
 ##  when dealing with wire-formatted articles manually fed from the spool).
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/) && ($0 ~ / m$/)) { print $1 } }' ${ACTIVE} | ${SORT} >${T}/$$amod.all
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/) && ($0 ~ / \(Moderated\)\r?$/)) { print $1 } }' ${T}/$$msg | ${SORT} >${T}/$$ng.mod
+cat ${ACTIVE} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+    print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+        && ($fields[0] =~ /'${PATS}'/) && ($_ =~ / m$/)); }' | ${SORT} > ${T}/$$amod.all
+cat ${T}/$$msg | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+    print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+        && ($fields[0] =~ /'${PATS}'/) && ($_ =~ / \(Moderated\)\r?$/)); }' \
+    | ${SORT} > ${T}/$$ng.mod
 
 comm -12 ${T}/$$missing ${T}/$$ng.mod >${T}/$$add.mod
 comm -23 ${T}/$$missing ${T}/$$ng.mod >${T}/$$add.unmod
@@ -72,23 +83,39 @@
 ##  Check for missing and obsolete newsgroups descriptions (possibly
 ##  in wire format).  A few sed implementations do not recognize
 ##  "[	]\+", so we use "	[	]*" instead.
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $0 } }' ${NEWSGROUPS} | ${SED} 's/	[	]*/	/' | ${SORT} >${T}/$$localdesc
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $0 } }' ${T}/$$msg | ${SED} 's/\r\?$//' \
-    | ${SED} 's/	[	]*/	/' | ${SORT} >${T}/$$newdesc
+cat ${NEWSGROUPS} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+    print $_ if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+        && ($fields[0] =~ /'${PATS}'/)); }' \
+    | ${SED} 's/	[	]*/	/' | ${SORT} > ${T}/$$localdesc
+cat ${T}/$$msg | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+    print $_ if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+        && ($fields[0] =~ /'${PATS}'/)); }' \
+    | ${SED} 's/\r\?$//' \
+    | ${SED} 's/	[	]*/	/' | ${SORT} > ${T}/$$newdesc
 
 comm -13 ${T}/$$localdesc ${T}/$$newdesc >${T}/$$missingdesc
 comm -23 ${T}/$$localdesc ${T}/$$newdesc >${T}/$$removedesc
 
 ##  If the -u flag is given, update the newsgroups descriptions.
 if [ "${UPDATEDESC}" = "true" ] ; then
-    ${AWK} '{ if ($1 !~ /'"${PATS}"'/) { print $0 } }' ${NEWSGROUPS} >${T}/$$updatednewsgroups
-    ${AWK} '{ if (($1 !~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $0 } }' ${NEWSGROUPS} >>${T}/$$updatednewsgroups
+    cat ${NEWSGROUPS} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+        print $_ if ((scalar(@fields) > 0) && ($fields[0] !~ /'${PATS}'/)); }' \
+        > ${T}/$$updatednewsgroups
+    cat ${NEWSGROUPS} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+        print $_ if ((scalar(@fields) > 0) && ($fields[0] !~ /'${1:-.}'/)
+        && ($fields[0] =~ /'${PATS}'/)); }' \
+        >> ${T}/$$updatednewsgroups
     cat ${T}/$$newdesc >>${T}/$$updatednewsgroups
     mv -f ${NEWSGROUPS} ${NEWSGROUPS}.old
     ${SORT} ${T}/$$updatednewsgroups | ${SED} 's/	[	]*/	/' \
-        | ${AWK} -F'\t' '{if (length($1) < 8) {print $1"\t\t\t"$2} \
-                    else {if (length($1) < 16) {print $1"\t\t"$2} \
-                    else {print $1"\t"$2}}}' >${NEWSGROUPS}
+        | ${PERL} -e 'while (<STDIN>) { my @fields = split("\t", $_, 2);
+            next if (scalar(@fields) == 0);
+            my $length = length("$fields[0]");
+            my $desc;
+            if (scalar(@fields) == 2) { $desc = "$fields[1]"; } else { $desc = ""; }
+            if ($length < 8) { print $fields[0]."\t\t\t".$desc; }
+            elsif ($length < 16) { print $fields[0]."\t\t".$desc; }
+            else { print $fields[0]."\t".$desc; } }' > ${NEWSGROUPS}
     chmod 0664 ${NEWSGROUPS} ${NEWSGROUPS}.old
 fi
 




More information about the inn-committers mailing list