INN commit: branches/2.5 (3 files)

INN Commit rra at isc.org
Sat Aug 30 12:06:33 UTC 2014


    Date: Saturday, August 30, 2014 @ 05:06:32
  Author: iulius
Revision: 9665

pullnews:  new -B flag (header-only feeding)

Add a new feature to pullnews:  header-only feeding.

If the article does not already have a Bytes: header field, one is added.
Bodies are kept only for control articles.

Thanks to Geraint Edwards for the patch.

Modified:
  branches/2.5/doc/pod/news.pod
  branches/2.5/doc/pod/pullnews.pod
  branches/2.5/frontends/pullnews.in

-----------------------+
 doc/pod/news.pod      |    5 +--
 doc/pod/pullnews.pod  |   11 +++++-
 frontends/pullnews.in |   77 +++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 76 insertions(+), 17 deletions(-)

Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod	2014-08-30 11:58:35 UTC (rev 9664)
+++ doc/pod/news.pod	2014-08-30 12:06:32 UTC (rev 9665)
@@ -14,8 +14,9 @@
 =item *
 
 Several improvements have been contributed to B<pullnews> by Geraint
-Edwards:  the B<-m> flag now permits to remove headers matching (or not)
-a given regexp, and B<rnews> reporting is improved.
+Edwards:  the new B<-B> flag triggers header-only feeding, the B<-m>
+flag now permits to remove headers matching (or not) a given regexp,
+and B<rnews> reporting is improved.
 
 =back
 

Modified: doc/pod/pullnews.pod
===================================================================
--- doc/pod/pullnews.pod	2014-08-30 11:58:35 UTC (rev 9664)
+++ doc/pod/pullnews.pod	2014-08-30 12:06:32 UTC (rev 9665)
@@ -4,10 +4,10 @@
 
 =head1 SYNOPSIS
 
-B<pullnews> [B<-hnqRx>] [B<-b> I<fraction>] [B<-c> I<config>] [B<-C> I<width>]
+B<pullnews> [B<-BhnOqRx>] [B<-b> I<fraction>] [B<-c> I<config>] [B<-C> I<width>]
 [B<-d> I<level>] [B<-f> I<fraction>] [B<-F> I<fakehop>] [B<-g> I<groups>]
 [B<-G> I<newsgroups>] [B<-H> I<headers>] [B<-k> I<checkpt>] [B<-l> I<logfile>]
-[B<-m> I<header_pats>] [B<-M> I<num>] [B<-N> I<timeout>] [B<-O>] [B<-p> I<port>]
+[B<-m> I<header_pats>] [B<-M> I<num>] [B<-N> I<timeout>] [B<-p> I<port>]
 [B<-P> I<hop_limit>] [B<-Q> I<level>] [B<-r> I<file>] [B<-s> I<to-server>[:I<port>]]
 [B<-S> I<max-run>] [B<-t> I<retries>] [B<-T> I<connect-pause>] [B<-w> I<num>]
 [B<-z> I<article-pause>] [B<-Z> I<group-pause>] [I<from-server> ...]
@@ -44,6 +44,13 @@
 our high for that group.  When I<fraction> is C<1.0>, pull all the articles on
 a renumbered server.  The default is to do nothing.
 
+=item B<-B>
+
+Feed is header-only, that is to say B<pullnews> only feeds the headers
+of the articles, plus one blank line.  It adds the Bytes: header field
+if the article does not already have one, and keeps the body only if
+the article is a control article.
+
 =item B<-c> I<config>
 
 Normally, the config file is stored in F<~/.pullnews> for the user running

Modified: frontends/pullnews.in
===================================================================
--- frontends/pullnews.in	2014-08-30 11:58:35 UTC (rev 9664)
+++ frontends/pullnews.in	2014-08-30 12:06:32 UTC (rev 9665)
@@ -12,8 +12,8 @@
 #               Full changelog can be found in the Subversion repository of the
 #               INN project.  Major changes are:
 #
-#               January 2010:  Geraint A. Edwards
-#               enable -m to remove headers matching (or not) a given regexp;
+#               January 2010:  Geraint A. Edwards added header-only feeding (-B);
+#               enabled -m to remove headers matching (or not) a given regexp;
 #               minor bug fix to rnews when -O; improved rnews reporting.
 #
 #               December 2008:  Matija Nalis added -O (optimized mode, checking
@@ -114,7 +114,7 @@
 }
 
 $usage =~ s!.*/!!;
-$usage .= " [ -hnOqRx -b fraction -c config -C width -d level
+$usage .= " [ -BhnOqRx -b fraction -c config -C width -d level
         -f fraction -F fakehop -g groups -G newsgroups -H headers
         -k checkpt -l logfile -m header_pats -M num -N num
         -p port -P hop_limit -Q level -r file -s host[:port] -S num
@@ -127,6 +127,10 @@
                 group.  When fraction is 1.0, pull all the articles on
                 the server.  The default is to do nothing.
 
+  -B            feed is header-only (headers plus one blank line).
+                Add the Bytes: header field if needed.  Keep body if
+                control article.
+
   -c config     specify the configuration file instead of the 
                 default of ~/.pullnews (in the running user's home directory).
 
@@ -218,11 +222,11 @@
 ";
 
 
-use vars qw($opt_b $opt_c $opt_C $opt_d $opt_f $opt_F $opt_g $opt_G
+use vars qw($opt_b $opt_B $opt_c $opt_C $opt_d $opt_f $opt_F $opt_g $opt_G
             $opt_h $opt_H $opt_k $opt_l $opt_m $opt_M $opt_n
             $opt_N $opt_O $opt_p $opt_P $opt_q $opt_Q $opt_r $opt_R $opt_s
             $opt_S $opt_t $opt_T $opt_w $opt_x $opt_z $opt_Z);
-getopts("b:c:C:d:f:F:g:G:hH:k:l:m:M:nN:Op:P:qQ:r:Rs:S:t:T:w:xz:Z:") || die $usage;
+getopts("b:Bc:C:d:f:F:g:G:hH:k:l:m:M:nN:Op:P:qQ:r:Rs:S:t:T:w:xz:Z:") || die $usage;
 
 die $usage if $opt_h;
 
@@ -233,6 +237,7 @@
 my $localServer         = $opt_s || $defaultHost;
 my $localPort           = $opt_p || $defaultPort;
 my $quiet               = $opt_q;
+my $header_only         = $opt_B;
 my $watermark           = $opt_w;
 my $retries             = $opt_t || $defaultRetries;
 my $retryTime           = $opt_T || $defaultRetryTime;
@@ -697,22 +702,43 @@
             }
         }
 
-        my $article = $skip_article ? '' : $fromServer->article($i);
+        my $add_bytes_header = 0;
+        my $is_control_art = 0;
+        my $article;
+        if (not $skip_article and $header_only) {
+            $article = $fromServer->head($i);
+            if ($fromServer->code() == 221) {
+                my $has_bytes_header = 0;
+                for my $hdr (@$article) {
+                    if (lc(substr($hdr, 0, 6)) eq 'bytes:') {
+                        $has_bytes_header = 1;
+                    } elsif (lc(substr($hdr, 0, 8)) eq 'control:') {
+                        $is_control_art = 1;
+                        last;
+                    }
+                }
+                $add_bytes_header = 1 if not $has_bytes_header;
+                push @{$article}, "\n" if not $is_control_art;
+            }
+        }
+        if (not $skip_article and (not $header_only or $is_control_art or $add_bytes_header)) {
+            $article = $fromServer->article($i);
+        }
 
         if ($article) {
             my $msgid;
             my $xref = 0;
             my $headers = 1;
-            my $idx;
             my $line_len = 0;
-            my $tx_len = 0;              # Transmitted article length (bytes) (for rnews).
+            my $idx_blank_pre_body;      # Index of the blank line between headers/body.
+            my $tx_len = 0;              # Transmitted article length (bytes) (for rnews, Bytes:).
             my @header_nums_to_go = ();
             my $match_all_hdrs = 1;      # Assume no headers to match.
             my $skip_due_to_hdrs = 0;
             my %m_found_hdrs = ();
             my $curr_hdr = '';
 
-            for ($idx = 0 ; $idx < @{$article} ; $idx++) {
+            for (my $idx = 0 ; $idx < @{$article} ; $idx++) {
                 $line_len = length($article->[$idx]);
                 $len += $line_len;
                 $tx_len += $line_len;
@@ -720,9 +746,13 @@
                 $info{bytes} += $line_len;
 
                 next if not $headers;
+                $idx_blank_pre_body = $idx;
 
                 $curr_hdr = lc($1) if $article->[$idx] =~ /^([^:[:blank:]]+):/;
-                $curr_hdr = '    ' if $article->[$idx] eq "\n";
+                if ($article->[$idx] eq "\n") {
+                    $headers = 0;
+                    next;
+                }
 
                 if ($match_all_hdrs and @hdr_to_match and $article->[$idx] =~ /^[^[:blank:]]/) {
                     # Check header matches -m flag if new header.
@@ -798,8 +828,6 @@
                 if ($opt_x && $article->[$idx] =~ m!^xref:!i) {
                     $xref = 1;
                 }
-
-                $headers = 0 if $article->[$idx] eq "\n";
             }
             if (@hdr_to_match and (not $match_all_hdrs
                 or scalar(grep { ! /^!?#/ } @hdr_to_match) != keys %m_found_hdrs)) {
@@ -809,11 +837,13 @@
                 my $idx = pop @header_nums_to_go;  # Start from last, so numbers are not affected.
                 my $cut = join("\n\t", splice(@{$article}, $idx, 1));
                 $tx_len -= length($cut);
+                $idx_blank_pre_body--;
                 print LOG "\tDEBUGGING $i\tcut1 $cut" if $debug >= 2;
                 while ($article->[$idx] =~ /^[[:space:]](.+)/) {
                     # Folded lines.
                     my $cut = join("\n\t", splice(@{$article}, $idx, 1));
                     $tx_len -= length($cut);
+                    $idx_blank_pre_body--;
                     print LOG "\tDEBUGGING $i\tcut_ $cut" if $debug >= 2;
                 }
             }
@@ -830,10 +860,31 @@
             if ($opt_x && !$xref) {
                 warn "No Xref: header found in article, adding\n";
                 my $xref_h = "Xref: $server $group: $i\n";
-                splice(@{$article}, $idx, 0, $xref_h);
+                splice(@{$article}, $idx_blank_pre_body, 0, $xref_h);
                 $tx_len += length($xref_h);
+                $idx_blank_pre_body++;
             }
 
+            if ($add_bytes_header) {
+                # Compute the number of bytes the same way the :bytes
+                # metadata item would do.  The additional Bytes: header
+                # field is not counted, as well as headers removed by
+                # pullnews.
+                my $bytes_real_count = $tx_len + scalar(@{$article});
+                my $bytes_h = "Bytes: $bytes_real_count\n";
+                splice(@{$article}, $idx_blank_pre_body, 0, $bytes_h);
+                $tx_len += length($bytes_h);
+                $idx_blank_pre_body++;
+            }
+
+            if ($header_only and not $is_control_art and @{$article} > $idx_blank_pre_body+1) {
+                splice(@{$article}, $idx_blank_pre_body+1);
+                $tx_len = 0;
+                for my $line (@{$article}) {
+                    $tx_len += length($line);
+                }
+            }
+
             $pulled->{$server}->{$group}++;
 
             if ($skip_due_to_hdrs) {



More information about the inn-committers mailing list