INN commit: branches/2.5 (3 files)
INN Commit
rra at isc.org
Sat Aug 30 12:06:33 UTC 2014
Date: Saturday, August 30, 2014 @ 05:06:32
Author: iulius
Revision: 9665
pullnews: new -B flag (header-only feeding)
Add a new feature to pullnews: header-only feeding.
If the article does not already have a Bytes: header field, one is added.
Bodies are kept only for control articles.
Thanks to Geraint Edwards for the patch.
Modified:
branches/2.5/doc/pod/news.pod
branches/2.5/doc/pod/pullnews.pod
branches/2.5/frontends/pullnews.in
-----------------------+
doc/pod/news.pod | 5 +--
doc/pod/pullnews.pod | 11 +++++-
frontends/pullnews.in | 77 +++++++++++++++++++++++++++++++++++++++---------
3 files changed, 76 insertions(+), 17 deletions(-)
Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod 2014-08-30 11:58:35 UTC (rev 9664)
+++ doc/pod/news.pod 2014-08-30 12:06:32 UTC (rev 9665)
@@ -14,8 +14,9 @@
=item *
Several improvements have been contributed to B<pullnews> by Geraint
-Edwards: the B<-m> flag now permits to remove headers matching (or not)
-a given regexp, and B<rnews> reporting is improved.
+Edwards: the new B<-B> flag triggers header-only feeding, the B<-m>
+flag now permits to remove headers matching (or not) a given regexp,
+and B<rnews> reporting is improved.
=back
Modified: doc/pod/pullnews.pod
===================================================================
--- doc/pod/pullnews.pod 2014-08-30 11:58:35 UTC (rev 9664)
+++ doc/pod/pullnews.pod 2014-08-30 12:06:32 UTC (rev 9665)
@@ -4,10 +4,10 @@
=head1 SYNOPSIS
-B<pullnews> [B<-hnqRx>] [B<-b> I<fraction>] [B<-c> I<config>] [B<-C> I<width>]
+B<pullnews> [B<-BhnOqRx>] [B<-b> I<fraction>] [B<-c> I<config>] [B<-C> I<width>]
[B<-d> I<level>] [B<-f> I<fraction>] [B<-F> I<fakehop>] [B<-g> I<groups>]
[B<-G> I<newsgroups>] [B<-H> I<headers>] [B<-k> I<checkpt>] [B<-l> I<logfile>]
-[B<-m> I<header_pats>] [B<-M> I<num>] [B<-N> I<timeout>] [B<-O>] [B<-p> I<port>]
+[B<-m> I<header_pats>] [B<-M> I<num>] [B<-N> I<timeout>] [B<-p> I<port>]
[B<-P> I<hop_limit>] [B<-Q> I<level>] [B<-r> I<file>] [B<-s> I<to-server>[:I<port>]]
[B<-S> I<max-run>] [B<-t> I<retries>] [B<-T> I<connect-pause>] [B<-w> I<num>]
[B<-z> I<article-pause>] [B<-Z> I<group-pause>] [I<from-server> ...]
@@ -44,6 +44,13 @@
our high for that group. When I<fraction> is C<1.0>, pull all the articles on
a renumbered server. The default is to do nothing.
+=item B<-B>
+
+Feed is header-only, that is to say B<pullnews> only feeds the headers
+of the articles, plus one blank line. It adds the Bytes: header field
+if the article does not already have one, and keeps the body only if
+the article is a control article.
+
=item B<-c> I<config>
Normally, the config file is stored in F<~/.pullnews> for the user running
Modified: frontends/pullnews.in
===================================================================
--- frontends/pullnews.in 2014-08-30 11:58:35 UTC (rev 9664)
+++ frontends/pullnews.in 2014-08-30 12:06:32 UTC (rev 9665)
@@ -12,8 +12,8 @@
# Full changelog can be found in the Subversion repository of the
# INN project. Major changes are:
#
-# January 2010: Geraint A. Edwards
-# enable -m to remove headers matching (or not) a given regexp;
+# January 2010: Geraint A. Edwards added header-only feeding (-B);
+# enabled -m to remove headers matching (or not) a given regexp;
# minor bug fix to rnews when -O; improved rnews reporting.
#
# December 2008: Matija Nalis added -O (optimized mode, checking
@@ -114,7 +114,7 @@
}
$usage =~ s!.*/!!;
-$usage .= " [ -hnOqRx -b fraction -c config -C width -d level
+$usage .= " [ -BhnOqRx -b fraction -c config -C width -d level
-f fraction -F fakehop -g groups -G newsgroups -H headers
-k checkpt -l logfile -m header_pats -M num -N num
-p port -P hop_limit -Q level -r file -s host[:port] -S num
@@ -127,6 +127,10 @@
group. When fraction is 1.0, pull all the articles on
the server. The default is to do nothing.
+ -B feed is header-only (headers plus one blank line).
+ Add the Bytes: header field if needed. Keep body if
+ control article.
+
-c config specify the configuration file instead of the
default of ~/.pullnews (in the running user's home directory).
@@ -218,11 +222,11 @@
";
-use vars qw($opt_b $opt_c $opt_C $opt_d $opt_f $opt_F $opt_g $opt_G
+use vars qw($opt_b $opt_B $opt_c $opt_C $opt_d $opt_f $opt_F $opt_g $opt_G
$opt_h $opt_H $opt_k $opt_l $opt_m $opt_M $opt_n
$opt_N $opt_O $opt_p $opt_P $opt_q $opt_Q $opt_r $opt_R $opt_s
$opt_S $opt_t $opt_T $opt_w $opt_x $opt_z $opt_Z);
-getopts("b:c:C:d:f:F:g:G:hH:k:l:m:M:nN:Op:P:qQ:r:Rs:S:t:T:w:xz:Z:") || die $usage;
+getopts("b:Bc:C:d:f:F:g:G:hH:k:l:m:M:nN:Op:P:qQ:r:Rs:S:t:T:w:xz:Z:") || die $usage;
die $usage if $opt_h;
@@ -233,6 +237,7 @@
my $localServer = $opt_s || $defaultHost;
my $localPort = $opt_p || $defaultPort;
my $quiet = $opt_q;
+my $header_only = $opt_B;
my $watermark = $opt_w;
my $retries = $opt_t || $defaultRetries;
my $retryTime = $opt_T || $defaultRetryTime;
@@ -697,22 +702,43 @@
}
}
- my $article = $skip_article ? '' : $fromServer->article($i);
+ my $add_bytes_header = 0;
+ my $is_control_art = 0;
+ my $article;
+ if (not $skip_article and $header_only) {
+ $article = $fromServer->head($i);
+ if ($fromServer->code() == 221) {
+ my $has_bytes_header = 0;
+ for my $hdr (@$article) {
+ if (lc(substr($hdr, 0, 6)) eq 'bytes:') {
+ $has_bytes_header = 1;
+ } elsif (lc(substr($hdr, 0, 8)) eq 'control:') {
+ $is_control_art = 1;
+ last;
+ }
+ }
+ $add_bytes_header = 1 if not $has_bytes_header;
+ push @{$article}, "\n" if not $is_control_art;
+ }
+ }
+ if (not $skip_article and (not $header_only or $is_control_art or $add_bytes_header)) {
+ $article = $fromServer->article($i);
+ }
if ($article) {
my $msgid;
my $xref = 0;
my $headers = 1;
- my $idx;
my $line_len = 0;
- my $tx_len = 0; # Transmitted article length (bytes) (for rnews).
+ my $idx_blank_pre_body; # Index of the blank line between headers/body.
+ my $tx_len = 0; # Transmitted article length (bytes) (for rnews, Bytes:).
my @header_nums_to_go = ();
my $match_all_hdrs = 1; # Assume no headers to match.
my $skip_due_to_hdrs = 0;
my %m_found_hdrs = ();
my $curr_hdr = '';
- for ($idx = 0 ; $idx < @{$article} ; $idx++) {
+ for (my $idx = 0 ; $idx < @{$article} ; $idx++) {
$line_len = length($article->[$idx]);
$len += $line_len;
$tx_len += $line_len;
@@ -720,9 +746,13 @@
$info{bytes} += $line_len;
next if not $headers;
+ $idx_blank_pre_body = $idx;
$curr_hdr = lc($1) if $article->[$idx] =~ /^([^:[:blank:]]+):/;
- $curr_hdr = ' ' if $article->[$idx] eq "\n";
+ if ($article->[$idx] eq "\n") {
+ $headers = 0;
+ next;
+ }
if ($match_all_hdrs and @hdr_to_match and $article->[$idx] =~ /^[^[:blank:]]/) {
# Check header matches -m flag if new header.
@@ -798,8 +828,6 @@
if ($opt_x && $article->[$idx] =~ m!^xref:!i) {
$xref = 1;
}
-
- $headers = 0 if $article->[$idx] eq "\n";
}
if (@hdr_to_match and (not $match_all_hdrs
or scalar(grep { ! /^!?#/ } @hdr_to_match) != keys %m_found_hdrs)) {
@@ -809,11 +837,13 @@
my $idx = pop @header_nums_to_go; # Start from last, so numbers are not affected.
my $cut = join("\n\t", splice(@{$article}, $idx, 1));
$tx_len -= length($cut);
+ $idx_blank_pre_body--;
print LOG "\tDEBUGGING $i\tcut1 $cut" if $debug >= 2;
while ($article->[$idx] =~ /^[[:space:]](.+)/) {
# Folded lines.
my $cut = join("\n\t", splice(@{$article}, $idx, 1));
$tx_len -= length($cut);
+ $idx_blank_pre_body--;
print LOG "\tDEBUGGING $i\tcut_ $cut" if $debug >= 2;
}
}
@@ -830,10 +860,31 @@
if ($opt_x && !$xref) {
warn "No Xref: header found in article, adding\n";
my $xref_h = "Xref: $server $group: $i\n";
- splice(@{$article}, $idx, 0, $xref_h);
+ splice(@{$article}, $idx_blank_pre_body, 0, $xref_h);
$tx_len += length($xref_h);
+ $idx_blank_pre_body++;
}
+ if ($add_bytes_header) {
+ # Compute the number of bytes the same way the :bytes
+ # metadata item would do. The additional Bytes: header
+ # field is not counted, as well as headers removed by
+ # pullnews.
+ my $bytes_real_count = $tx_len + scalar(@{$article});
+ my $bytes_h = "Bytes: $bytes_real_count\n";
+ splice(@{$article}, $idx_blank_pre_body, 0, $bytes_h);
+ $tx_len += length($bytes_h);
+ $idx_blank_pre_body++;
+ }
+
+ if ($header_only and not $is_control_art and @{$article} > $idx_blank_pre_body+1) {
+ splice(@{$article}, $idx_blank_pre_body+1);
+ $tx_len = 0;
+ for my $line (@{$article}) {
+ $tx_len += length($line);
+ }
+ }
+
$pulled->{$server}->{$group}++;
if ($skip_due_to_hdrs) {
More information about the inn-committers
mailing list