INN commit: branches/2.5/control (docheckgroups.in)
INN Commit
rra at isc.org
Tue Aug 16 14:33:19 UTC 2011
Date: Tuesday, August 16, 2011 @ 07:33:18
Author: iulius
Revision: 9344
docheckgroups: no longer use awk
On a few systems, docheckgroups fails because of the use of an old
version of awk. A few awk implementations have a limit in the size
of the input they can process; it then cause issues with large
newsgroups files.
Use Perl instead of awk for the time being. It solves these issues.
Successfully tested on a system where docheckgroups was previously
failing. Thanks to John F. Morse for the bug report.
The next step will be a total rewrite of docheckgroups. It will
be much more maintainable as a Perl script.
Modified:
branches/2.5/control/docheckgroups.in
------------------+
docheckgroups.in | 59 ++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 43 insertions(+), 16 deletions(-)
Modified: docheckgroups.in
===================================================================
--- docheckgroups.in 2011-08-16 14:28:07 UTC (rev 9343)
+++ docheckgroups.in 2011-08-16 14:33:18 UTC (rev 9344)
@@ -23,8 +23,10 @@
fi
## Copy the message without excluded newsgroups and append local newsgroups.
-cat | ${AWK} '{ if ($1 !~ /'"${2:-^#}"'/ && $1) { print $0 } }' >${T}/$$msg
-test -f ${LOCALGROUPS} && cat ${LOCALGROUPS} | ${EGREP} -v "^#" >>${T}/$$msg
+cat | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $_ if ((scalar(@fields) > 0) && ($fields[0] !~ /'${2:-^#}'/)
+ && ($fields[0] !~ /^#/)); }' > ${T}/$$msg
+test -f ${LOCALGROUPS} && cat ${LOCALGROUPS} | ${EGREP} -v "^#" >> ${T}/$$msg
## Exit if there is no matching newsgroup (otherwise docheckgroups is eager
## to delete everything).
@@ -43,22 +45,31 @@
PATS=`${SED} <${T}/$$msg \
-e 's/[ ].*//' -e 's/\..*//' \
-e 's/^!//' -e '/^$/d' \
- -e 's/^/^/' -e 's/$/[. ]/' \
+ -e 's/^/^/' -e 's/$/[\.\w]/' \
| ${SORT} -u \
- | (tr '\012' '|' ; echo '' )\
+ | (tr '\012' '|' ; echo '' ) \
| ${SED} -e 's/|$//'`
## Check for missing and obsolete newsgroups in active.
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $1 } }' ${ACTIVE} | ${SORT} >${T}/$$active
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $1 } }' ${T}/$$msg | ${SORT} >${T}/$$newsgrps
+cat ${ACTIVE} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+ && ($fields[0] =~ /'${PATS}'/)); }' | ${SORT} > ${T}/$$active
+cat ${T}/$$msg | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+ && ($fields[0] =~ /'${PATS}'/)); }' | ${SORT} > ${T}/$$newsgrps
comm -13 ${T}/$$active ${T}/$$newsgrps >${T}/$$missing
comm -23 ${T}/$$active ${T}/$$newsgrps >${T}/$$remove
## Check for proper moderation flags in active (we need to be careful
## when dealing with wire-formatted articles manually fed from the spool).
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/) && ($0 ~ / m$/)) { print $1 } }' ${ACTIVE} | ${SORT} >${T}/$$amod.all
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/) && ($0 ~ / \(Moderated\)\r?$/)) { print $1 } }' ${T}/$$msg | ${SORT} >${T}/$$ng.mod
+cat ${ACTIVE} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+ && ($fields[0] =~ /'${PATS}'/) && ($_ =~ / m$/)); }' | ${SORT} > ${T}/$$amod.all
+cat ${T}/$$msg | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $fields[0]."\n" if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+ && ($fields[0] =~ /'${PATS}'/) && ($_ =~ / \(Moderated\)\r?$/)); }' \
+ | ${SORT} > ${T}/$$ng.mod
comm -12 ${T}/$$missing ${T}/$$ng.mod >${T}/$$add.mod
comm -23 ${T}/$$missing ${T}/$$ng.mod >${T}/$$add.unmod
@@ -72,23 +83,39 @@
## Check for missing and obsolete newsgroups descriptions (possibly
## in wire format). A few sed implementations do not recognize
## "[ ]\+", so we use " [ ]*" instead.
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $0 } }' ${NEWSGROUPS} | ${SED} 's/ [ ]*/ /' | ${SORT} >${T}/$$localdesc
-${AWK} '{ if (($1 ~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $0 } }' ${T}/$$msg | ${SED} 's/\r\?$//' \
- | ${SED} 's/ [ ]*/ /' | ${SORT} >${T}/$$newdesc
+cat ${NEWSGROUPS} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $_ if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+ && ($fields[0] =~ /'${PATS}'/)); }' \
+ | ${SED} 's/ [ ]*/ /' | ${SORT} > ${T}/$$localdesc
+cat ${T}/$$msg | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $_ if ((scalar(@fields) > 0) && ($fields[0] =~ /'${1:-.}'/)
+ && ($fields[0] =~ /'${PATS}'/)); }' \
+ | ${SED} 's/\r\?$//' \
+ | ${SED} 's/ [ ]*/ /' | ${SORT} > ${T}/$$newdesc
comm -13 ${T}/$$localdesc ${T}/$$newdesc >${T}/$$missingdesc
comm -23 ${T}/$$localdesc ${T}/$$newdesc >${T}/$$removedesc
## If the -u flag is given, update the newsgroups descriptions.
if [ "${UPDATEDESC}" = "true" ] ; then
- ${AWK} '{ if ($1 !~ /'"${PATS}"'/) { print $0 } }' ${NEWSGROUPS} >${T}/$$updatednewsgroups
- ${AWK} '{ if (($1 !~ /'"${1:-.}"'/) && ($1 ~ /'"${PATS}"'/)) { print $0 } }' ${NEWSGROUPS} >>${T}/$$updatednewsgroups
+ cat ${NEWSGROUPS} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $_ if ((scalar(@fields) > 0) && ($fields[0] !~ /'${PATS}'/)); }' \
+ > ${T}/$$updatednewsgroups
+ cat ${NEWSGROUPS} | ${PERL} -e 'while (<STDIN>) { my @fields = split();
+ print $_ if ((scalar(@fields) > 0) && ($fields[0] !~ /'${1:-.}'/)
+ && ($fields[0] =~ /'${PATS}'/)); }' \
+ >> ${T}/$$updatednewsgroups
cat ${T}/$$newdesc >>${T}/$$updatednewsgroups
mv -f ${NEWSGROUPS} ${NEWSGROUPS}.old
${SORT} ${T}/$$updatednewsgroups | ${SED} 's/ [ ]*/ /' \
- | ${AWK} -F'\t' '{if (length($1) < 8) {print $1"\t\t\t"$2} \
- else {if (length($1) < 16) {print $1"\t\t"$2} \
- else {print $1"\t"$2}}}' >${NEWSGROUPS}
+ | ${PERL} -e 'while (<STDIN>) { my @fields = split("\t", $_, 2);
+ next if (scalar(@fields) == 0);
+ my $length = length("$fields[0]");
+ my $desc;
+ if (scalar(@fields) == 2) { $desc = "$fields[1]"; } else { $desc = ""; }
+ if ($length < 8) { print $fields[0]."\t\t\t".$desc; }
+ elsif ($length < 16) { print $fields[0]."\t\t".$desc; }
+ else { print $fields[0]."\t".$desc; } }' > ${NEWSGROUPS}
chmod 0664 ${NEWSGROUPS} ${NEWSGROUPS}.old
fi
More information about the inn-committers
mailing list