INN commit: trunk (3 files)
INN Commit
rra at isc.org
Fri Aug 22 21:38:40 UTC 2014
Date: Friday, August 22, 2014 @ 14:38:40
Author: iulius
Revision: 9652
pullnews: remove headers matching (or not) a given regexp
Enable the -m flag to remove headers matching (or not) a given regexp.
Thanks to Geraint Edwards for the patch.
Modified:
trunk/doc/pod/news.pod
trunk/doc/pod/pullnews.pod
trunk/frontends/pullnews.in
-----------------------+
doc/pod/news.pod | 6 ++++++
doc/pod/pullnews.pod | 5 ++++-
frontends/pullnews.in | 34 ++++++++++++++++++++++++----------
3 files changed, 34 insertions(+), 11 deletions(-)
Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod 2014-08-09 10:32:07 UTC (rev 9651)
+++ doc/pod/news.pod 2014-08-22 21:38:40 UTC (rev 9652)
@@ -183,6 +183,12 @@
not all processes die within timeout). Thanks to Lauri Tirkkonen for
the patch.
+=item *
+
+Several improvements have been contributed to B<pullnews> by Geraint
+Edwards: the B<-m> flag now permits to remove headers matching (or not)
+a given regexp.
+
=back
=head1 Changes in 2.5.4
Modified: doc/pod/pullnews.pod
===================================================================
--- doc/pod/pullnews.pod 2014-08-09 10:32:07 UTC (rev 9651)
+++ doc/pod/pullnews.pod 2014-08-22 21:38:40 UTC (rev 9652)
@@ -115,10 +115,13 @@
whitespace-separated tuples (each tuple being a colon-separated header and
regular expression). For instance:
- -m "Hdr1:regexp1 !Hdr2:regexp2"
+ -m "Hdr1:regexp1 !Hdr2:regexp2 #Hdr3:regexp3 !#Hdr4:regexp4"
specifies that the article will be passed only if the C<Hdr1:> header
matches C<regexp1> and the C<Hdr2:> header does not match C<regexp2>.
+Besides, if the C<Hdr3:> header matches C<regexp3>, that header is
+removed; and if the C<Hdr4:> header does not match C<regexp4>, that
+header is removed.
=item B<-M> I<num>
Modified: frontends/pullnews.in
===================================================================
--- frontends/pullnews.in 2014-08-09 10:32:07 UTC (rev 9651)
+++ frontends/pullnews.in 2014-08-22 21:38:40 UTC (rev 9652)
@@ -160,10 +160,13 @@
-l logfile log progress/stats to logfile (default is stdout).
- -m 'Hdr1:regexp1 !Hdr2:regexp2 ...'
+ -m 'Hdr1:regexp1 !Hdr2:regexp2 #Hdr3:regexp3 !#Hdr4:regexp4 ...'
feed article only if:
- the Hdr1: header matches regexp1
- and the Hdr2: header does not match regexp2.
+ the Hdr1: header matches regexp1;
+ and the Hdr2: header does not match regexp2;
+ also, process the message thus:
+ if the Hdr3: header matches regexp3, remove that header;
+ if the Hdr4: header does not match regexp4, remove it.
-M num maximum number of articles (per group) to process before
bailing out.
@@ -730,22 +733,33 @@
my ($hdr_un, $val_un) = split(':', $unfolded_art_hdr, 2);
$val_un = '' if not defined $val_un;
$val_un =~ s/^\s*//;
+ my $remove_hdr = 0;
for my $tuple_match (@hdr_to_match) {
my ($hdr_m, $val_m) = split(':', $tuple_match, 2);
my $negate_h = ($hdr_m =~ s/^!//);
+ my $remove_h = ($hdr_m =~ s/^#//);
next if lc($hdr_un) ne lc($hdr_m);
- $m_found_hdrs{lc($hdr_m)} = 1;
+ $m_found_hdrs{lc($hdr_m)} = 1 if not $remove_h;
if ($negate_h) {
if ($val_un =~ /$val_m/i) {
print LOG "\tDEBUGGING $i\t-- $hdr_un [$val_un]\n" if $debug >= 2;
- $match_all_hdrs = 0;
+ if (not $remove_h) {
+ $match_all_hdrs = 0;
+ }
+ } elsif ($remove_h) {
+ $remove_hdr = 1;
}
} elsif (not $val_un =~ /$val_m/i) {
print LOG "\tDEBUGGING $i\t++ $hdr_un [$val_un]\n" if $debug >= 2;
- $match_all_hdrs = 0;
+ if (not $remove_h) {
+ $match_all_hdrs = 0;
+ }
+ } elsif ($remove_h) {
+ $remove_hdr = 1;
}
last if not $match_all_hdrs;
}
+ push @header_nums_to_go, $idx if $remove_hdr;
}
if (grep { $curr_hdr eq $_ } split(':', $skip_headers)) {
@@ -781,12 +795,12 @@
$headers = 0 if $article->[$idx] eq "\n";
}
- if (@hdr_to_match and (not $match_all_hdrs or @hdr_to_match != scalar(keys %m_found_hdrs))) {
- print LOG "\tDEBUGGING $i\thdr_skip_art $i\n" if $debug >= 2;
- $skip_due_to_hdrs = 1;
+ if (@hdr_to_match and (not $match_all_hdrs
+ or scalar(grep { ! /^!?#/ } @hdr_to_match) != keys %m_found_hdrs)) {
+ $skip_due_to_hdrs = 2;
}
while (@header_nums_to_go) {
- my $idx = pop @header_nums_to_go; # Start from last.
+ my $idx = pop @header_nums_to_go; # Start from last, so numbers are not affected.
my $cut = join("\n\t", splice(@{$article}, $idx, 1));
$tx_len -= length($cut);
print LOG "\tDEBUGGING $i\tcut1 $cut" if $debug >= 2;
More information about the inn-committers
mailing list