INN commit: trunk (3 files)

INN Commit rra at isc.org
Fri Aug 22 21:38:40 UTC 2014


    Date: Friday, August 22, 2014 @ 14:38:40
  Author: iulius
Revision: 9652

pullnews:  remove headers matching (or not) a given regexp

Enable the -m flag to remove headers matching (or not) a given regexp.

Thanks to Geraint Edwards for the patch.

Modified:
  trunk/doc/pod/news.pod
  trunk/doc/pod/pullnews.pod
  trunk/frontends/pullnews.in

-----------------------+
 doc/pod/news.pod      |    6 ++++++
 doc/pod/pullnews.pod  |    5 ++++-
 frontends/pullnews.in |   34 ++++++++++++++++++++++++----------
 3 files changed, 34 insertions(+), 11 deletions(-)

Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod	2014-08-09 10:32:07 UTC (rev 9651)
+++ doc/pod/news.pod	2014-08-22 21:38:40 UTC (rev 9652)
@@ -183,6 +183,12 @@
 not all processes die within timeout).  Thanks to Lauri Tirkkonen for
 the patch.
 
+=item *
+
+Several improvements have been contributed to B<pullnews> by Geraint
+Edwards:  the B<-m> flag now permits to remove headers matching (or not)
+a given regexp.
+
 =back
 
 =head1 Changes in 2.5.4

Modified: doc/pod/pullnews.pod
===================================================================
--- doc/pod/pullnews.pod	2014-08-09 10:32:07 UTC (rev 9651)
+++ doc/pod/pullnews.pod	2014-08-22 21:38:40 UTC (rev 9652)
@@ -115,10 +115,13 @@
 whitespace-separated tuples (each tuple being a colon-separated header and
 regular expression).  For instance:
 
-    -m "Hdr1:regexp1 !Hdr2:regexp2"
+    -m "Hdr1:regexp1 !Hdr2:regexp2 #Hdr3:regexp3 !#Hdr4:regexp4"
 
 specifies that the article will be passed only if the C<Hdr1:> header
 matches C<regexp1> and the C<Hdr2:> header does not match C<regexp2>.
+Besides, if the C<Hdr3:> header matches C<regexp3>, that header is
+removed; and if the C<Hdr4:> header does not match C<regexp4>, that
+header is removed.
 
 =item B<-M> I<num>
 

Modified: frontends/pullnews.in
===================================================================
--- frontends/pullnews.in	2014-08-09 10:32:07 UTC (rev 9651)
+++ frontends/pullnews.in	2014-08-22 21:38:40 UTC (rev 9652)
@@ -160,10 +160,13 @@
 
   -l logfile    log progress/stats to logfile (default is stdout).
 
-  -m 'Hdr1:regexp1 !Hdr2:regexp2 ...'
+  -m 'Hdr1:regexp1 !Hdr2:regexp2 #Hdr3:regexp3 !#Hdr4:regexp4 ...'
                 feed article only if:
-                the Hdr1: header matches regexp1
-                and the Hdr2: header does not match regexp2.
+                  the Hdr1: header matches regexp1;
+                  and the Hdr2: header does not match regexp2;
+                also, process the message thus:
+                  if the Hdr3: header matches regexp3, remove that header;
+                  if the Hdr4: header does not match regexp4, remove it.
 
   -M num        maximum number of articles (per group) to process before
                 bailing out.
@@ -730,22 +733,33 @@
                     my ($hdr_un, $val_un) = split(':', $unfolded_art_hdr, 2);
                     $val_un = '' if not defined $val_un;
                     $val_un =~ s/^\s*//;
+                    my $remove_hdr = 0;
                     for my $tuple_match (@hdr_to_match) {
                         my ($hdr_m, $val_m) = split(':', $tuple_match, 2);
                         my $negate_h = ($hdr_m =~ s/^!//);
+                        my $remove_h = ($hdr_m =~ s/^#//);
                         next if lc($hdr_un) ne lc($hdr_m);
-                        $m_found_hdrs{lc($hdr_m)} = 1;
+                        $m_found_hdrs{lc($hdr_m)} = 1 if not $remove_h;
                         if ($negate_h) {
                             if ($val_un =~ /$val_m/i) {
                                 print LOG "\tDEBUGGING $i\t-- $hdr_un [$val_un]\n" if $debug >= 2;
-                                $match_all_hdrs = 0;
+                                if (not $remove_h) {
+                                    $match_all_hdrs = 0;
+                                }
+                            } elsif ($remove_h) {
+                                $remove_hdr = 1;
                             }
                         } elsif (not $val_un =~ /$val_m/i) {
                             print LOG "\tDEBUGGING $i\t++ $hdr_un [$val_un]\n" if $debug >= 2;
-                            $match_all_hdrs = 0;
+                            if (not $remove_h) {
+                                $match_all_hdrs = 0;
+                            }
+                        } elsif ($remove_h) {
+                            $remove_hdr = 1;
                         }
                         last if not $match_all_hdrs;
                     }
+                    push @header_nums_to_go, $idx if $remove_hdr;
                 }
 
                 if (grep { $curr_hdr eq $_ } split(':', $skip_headers)) {
@@ -781,12 +795,12 @@
 
                 $headers = 0 if $article->[$idx] eq "\n";
             }
-            if (@hdr_to_match and (not $match_all_hdrs or @hdr_to_match != scalar(keys %m_found_hdrs))) {
-                print LOG "\tDEBUGGING $i\thdr_skip_art $i\n" if $debug >= 2;
-                $skip_due_to_hdrs = 1;
+            if (@hdr_to_match and (not $match_all_hdrs
+                or scalar(grep { ! /^!?#/ } @hdr_to_match) != keys %m_found_hdrs)) {
+                $skip_due_to_hdrs = 2;
             }
             while (@header_nums_to_go) {
-                my $idx = pop @header_nums_to_go;  # Start from last.
+                my $idx = pop @header_nums_to_go;  # Start from last, so numbers are not affected.
                 my $cut = join("\n\t", splice(@{$article}, $idx, 1));
                 $tx_len -= length($cut);
                 print LOG "\tDEBUGGING $i\tcut1 $cut" if $debug >= 2;



More information about the inn-committers mailing list