Header-only feed patches ....
"Miquel van Smoorenburg"
list-inn-workers at news.cistron.nl
Mon Sep 19 19:08:33 UTC 2005
In article <017a01c5bc79$f7614960$3d64880a at speedy>,
Christiaan den Besten <chris at prolocation.net> wrote:
>I would like to experiment with a (to be written) new overview backend
>for inn. Just to see how it performs, I would like to put all
>overview data in a mysql-db and see if that is better for stuff like XPAT.
I very much doubt it.
>It would be nice if I could send a header-only feed to this testserver .....
>Does anyone still have the patches for inn so it can handle a
>header-only feed from a diablo feeder ?
I've never seen patches for innfeed to do this, so I started working
on it myself some time ago. I still have the patch, it compiles,
but I never actually tried if it works. If anyone is interested in
picking this up and finishing it .. here it is:
inn-2.4.1-headonly.patch
diff -ruN inn-2.4.1-maxartsize.ORIG/doc/man/innfeed.conf.5 inn-2.4.1-maxartsize/doc/man/innfeed.conf.5
--- inn-2.4.1-maxartsize.ORIG/doc/man/innfeed.conf.5 Wed Jan 7 23:47:19 2004
+++ inn-2.4.1-maxartsize/doc/man/innfeed.conf.5 Fri Oct 8 00:10:57 2004
@@ -459,6 +459,21 @@
This key requires a positive integer value. It defines the tcp/ip port
number to use when connecting to the remote.
.TP
+.B headers-only
+This key requires a boolean value. By default it is set to false. When
+set to true, the peer will be sent a headers-only feed. If a Bytes:
+header is not present, one will be added with as value the length of the
+complete article. This is useful for feeding a diablo server, or an
+overview-only INN server.
+Note that innfeed should send a MODE HEADFEED command to the peer to
+make sure it expects a headers-only feed, and the current code doesn't
+do that yet, partly because INN itself doesn't understand it yet so
+it would be impossible to feed a headers-only feed to INN.
+Make sure that you
+.I never
+.I ever
+send a headers-only feed to a peer that also receives a normal feed!
+.TP
.B drop-deferred
This key requires a boolean value. By default it is set to false. When
set to true, and a peer replies with code 431 or 436 (try again later) just
Binary files inn-2.4.1-maxartsize.ORIG/innfeed/.article.c.swp and inn-2.4.1-maxartsize/innfeed/.article.c.swp differ
diff -ruN inn-2.4.1-maxartsize.ORIG/innfeed/article.c inn-2.4.1-maxartsize/innfeed/article.c
--- inn-2.4.1-maxartsize.ORIG/innfeed/article.c Wed Jan 7 23:47:19 2004
+++ inn-2.4.1-maxartsize/innfeed/article.c Fri Oct 8 00:27:24 2004
@@ -53,7 +53,9 @@
char *fname ; /* the file name of the article */
char *msgid ; /* the msgid of the article (INN tells us) */
Buffer contents ; /* the buffer of the actual on disk stuff */
+ Buffer headers ; /* the buffer of just the headers */
Buffer *nntpBuffers ; /* list of buffers for transmisson */
+ Buffer *nntpHeaderBuffers ; /* list of header buffers for transmission */
const void *mMapping ; /* base of memory mapping, or NULL if none */
bool loggedMissing ; /* true if article is missing and we logged */
bool articleOk ; /* true until we know otherwise. */
@@ -76,21 +78,20 @@
* Private functions
*/
-static Buffer artGetContents (Article article) ; /* Return the buffer that
- fillContents() filled
- up. */
+ /* Return the buffer that fillContents() filled up. */
+static Buffer artGetContents (Article article, bool headersOnly) ;
/* Log statistics on article memory usage. */
static void logArticleStats (TimeoutId id, void *data) ;
-static bool fillContents (Article article) ; /* Read the article's bits
- off the disk. */
+ /* Read the article's bits off the disk. */
+static bool fillContents (Article article, bool HeadersOnly) ;
/* Append buffer B to the buffer array BUFFS. */
static void appendBuffer (Buffer b, Buffer **buffs, int *newSpot, int *curLen);
-static bool prepareArticleForNNTP (Article article) ; /* Do the necessary
- CR-LF stuff */
+ /* Do the necessary CR-LF stuff */
+static bool prepareArticleForNNTP (Article article, bool headersOnly) ;
static bool artFreeContents (Article art) ; /* Tell the Article to release
its contents buffer if
@@ -108,7 +109,7 @@
static unsigned int hashString (const char *string) ;
/* Locates the article with the given message ID, in the has table. */
-static Article hashFindArticle (const char *msgid) ;
+static Article hashFindArticle (const char *msgid);
/* Puts the given article in the hash table. */
static void hashAddArticle (Article article) ;
@@ -184,7 +185,7 @@
Article newArticle (const char *filename, const char *msgid)
{
Article newArt = NULL ;
-
+
TMRstart(TMR_NEWARTICLE);
if (hashTable == NULL)
{ /* first-time through initialization. */
@@ -212,6 +213,7 @@
newArt->msgid = xstrdup (msgid) ;
newArt->contents = NULL ;
+ newArt->headers = NULL ;
newArt->mMapping = NULL ;
newArt->refCount = 1 ;
newArt->loggedMissing = false ;
@@ -235,6 +237,7 @@
newArt->refCount++ ;
d_printf (2,"Reusing existing article for %s\nx",msgid) ;
}
+
TMRstop(TMR_NEWARTICLE);
return newArt ;
}
@@ -268,6 +271,15 @@
if (article->nntpBuffers != NULL)
freeBufferArray (article->nntpBuffers) ;
+ if (article->nntpHeaderBuffers != NULL)
+ freeBufferArray (article->nntpHeaderBuffers) ;
+
+ if (article->headers) {
+ if (article->contents != article->headers)
+ bytesInUse -= bufferDataSize (article->headers) ;
+ delBuffer (article->headers) ;
+ }
+
delBuffer (article->contents) ;
}
@@ -360,11 +372,11 @@
}
/* return true if we have or are able to get the contents off the disk */
-bool artContentsOk (Article article)
+bool artContentsOk (Article article, bool headersOnly)
{
bool rval = false ;
- if ( prepareArticleForNNTP (article) )
+ if ( prepareArticleForNNTP (article, headersOnly) )
rval = true ;
return rval ;
@@ -393,12 +405,13 @@
/* Get a NULL terminated array of Buffers that is ready for sending via NNTP */
-Buffer *artGetNntpBuffers (Article article)
+Buffer *artGetNntpBuffers (Article article, bool headersOnly)
{
- if ( !prepareArticleForNNTP (article) )
+ if ( !prepareArticleForNNTP (article, headersOnly) )
return NULL ;
- return dupBufferArray (article->nntpBuffers) ;
+ return dupBufferArray (headersOnly ? article->nntpHeaderBuffers :
+ article->nntpBuffers) ;
}
@@ -409,21 +422,24 @@
}
/* return size of the article */
-int artSize (Article article)
+int artSize (Article article, bool headersOnly)
{
if (article == NULL || article->contents == NULL)
return (int)0 ;
+ if (headersOnly && article->headers == NULL)
+ return (int)0;
return (int)bufferDataSize(article->contents);
}
/* return how many NNTP-ready buffers the article contains */
-unsigned int artNntpBufferCount (Article article)
+unsigned int artNntpBufferCount (Article article, bool headersOnly)
{
- if ( !prepareArticleForNNTP (article) )
+ if ( !prepareArticleForNNTP (article, headersOnly) )
return 0 ;
- return bufferArrayLen (article->nntpBuffers) ;
+ return bufferArrayLen (headersOnly ? article->nntpHeaderBuffers :
+ article->nntpBuffers) ;
}
@@ -451,17 +467,18 @@
/* return a single buffer that contains the disk image of the article (i.e.
not fixed up for NNTP). */
-static Buffer artGetContents (Article article)
+static Buffer artGetContents (Article article, bool headersOnly)
{
Buffer rval = NULL ;
if (article->articleOk)
{
if (article->contents == NULL)
- fillContents (article) ;
+ fillContents (article, headersOnly) ;
if (article->contents != NULL)
- rval = bufferTakeRef (article->contents) ;
+ rval = bufferTakeRef (headersOnly ? article->headers :
+ article->contents) ;
}
return rval ;
@@ -495,6 +512,53 @@
articleStatsId = prepareSleep (logArticleStats,ARTICLE_STATS_PERIOD,0) ;
}
+/* Find the start of the body, and check if there's a Bytes: header.
+ body will point to the empty line between header and body. */
+static void findBody (char *data, size_t size, bool inWireFormat,
+ char **body, char **bytes, char *control)
+{
+ char *p;
+ int nlseen = 1;
+
+ *bytes = NULL;
+ *control = NULL;
+ *body = NULL;
+ if (inWireFormat) size--;
+
+ for (p = data; p < data + size; p++) {
+
+ if (nlseen && (*p == 'b' || *p == 'B') &&
+ p < data + size - 6 &&
+ strncasecmp(p, "Bytes:", 6) == 0)
+ *bytes = p;
+
+ if (nlseen && (*p == 'c' || *p == 'C') &&
+ p < data + size - 8 &&
+ strncasecmp(p, "Control:", 8) == 0)
+ *control = p;
+
+ if (inWireFormat) {
+ if (*p != '\r' && *(p+1) == '\n') {
+ p++;
+ continue;
+ }
+ if (*p == '\r' && *(p+1) == '\n')
+ p++;
+ }
+
+ if (*p == '\n') {
+ if (nlseen) {
+ *body = p - (inWireFormat ? 1 : 0);
+ break;
+ }
+ nlseen = 1;
+ } else
+ nlseen = 0;
+ }
+
+ if (*bytes == NULL) *bytes = p;
+}
+
/* do the actual read of the article off disk into a Buffer that is stored
in the Article object. The Article will end up with its contents field
@@ -504,13 +568,15 @@
contents may be copied around after reading to insert a carriage
return before each newline. */
-static bool fillContents (Article article)
+static bool fillContents (Article article, bool headersOnly)
{
+ Buffer body;
int fd = -1;
- char *p;
+ char *p, *b;
static bool maxLimitNotified ;
bool opened;
size_t articlesize = 0;
+ size_t bytes, hdrsize;
char *buffer = NULL ;
int amt = 0 ;
size_t idx = 0, amtToRead ;
@@ -721,13 +787,38 @@
}
}
-
+ /* In header-only mode we need to replace article->contents with
+ * a buffer with just the headers, followed by a Bytes: header. */
+ if (article->articleOk && article->contents && headersOnly) {
+
+ bytes = bufferDataSize (article->contents);
+ buffer = bufferBase (article->contents);
+
+ findBody(buffer, bytes, article->inWireFormat, &p, &bHdr, &cHdr);
+
+ if (cHdr)
+ article->headers = bufferTakeRef (article->contents) ;
+ else if ((article->headers = newBuffer (hdrsize + 24)) != NULL) {
+ p = bufferBase(article->headers);
+ memcpy(p, buffer, hdrsize);
+ if (bHdr == NULL)
+ hdrsize += sprintf(p + hdrsize, "Bytes: %d", bytes);
+ hdrsize += sprintf(p + hdrsize, "%s",
+ article->inWireFormat ? "\r\n\r\n" : "\n\n");
+ bufferSetDataSize (article->headers, hdrsize) ;
+ bytesInUse += hdrsize;
+ byteTotal += hdrsize;
+ } else
+ warn ("ME internal failed to build headfeed buffer") ;
+ }
+
/* If we're not useing storage api, we should close a valid file descriptor */
if (!article->arthandle && (fd >= 0))
close (fd) ;
TMRstop(TMR_READART);
- return (article->contents != NULL ? true : false) ;
+ return ((headersOnly ? article->headers : article->contents) != NULL ?
+ true : false) ;
}
@@ -751,7 +842,7 @@
/* Takes the articles contents buffer and overlays a set of new buffers on
top of it. These buffers insert the required carriage return and dot
characters as needed */
-static bool prepareArticleForNNTP (Article article)
+static bool prepareArticleForNNTP (Article article, bool headersOnly)
{
static Buffer dotFirstBuffer ;
static Buffer dotBuffer ;
@@ -762,14 +853,15 @@
char *start, *end ;
Buffer contents ;
- contents = artGetContents (article) ; /* returns a reference */
+ contents = artGetContents (article, headersOnly) ; /* returns a reference */
TMRstart(TMR_PREPART);
if (contents == NULL) {
TMRstop(TMR_PREPART);
return false ;
}
- else if (article->nntpBuffers != NULL)
+ else if ((!headersOnly && article->nntpBuffers != NULL) ||
+ ( headersOnly && article->nntpHeaderBuffers != NULL))
{
delBuffer (contents) ;
TMRstop(TMR_PREPART);
@@ -821,7 +913,10 @@
delBuffer (contents) ; /* the article is still holding a reference */
- article->nntpBuffers = nntpBuffs ;
+ if (headersOnly)
+ article->nntpHeaderBuffers = nntpBuffs ;
+ else
+ article->nntpBuffers = nntpBuffs ;
TMRstop(TMR_PREPART);
return true ;
}
@@ -845,6 +940,17 @@
}
}
+ if (art->nntpHeaderBuffers != NULL)
+ {
+ if (bufferRefCount (art->nntpHeaderBuffers[0]) > 1)
+ return false ;
+ else
+ {
+ freeBufferArray (art->nntpHeaderBuffers) ;
+ art->nntpHeaderBuffers = NULL ;
+ }
+ }
+
ASSERT (bufferRefCount (art->contents) == 1) ;
if (art->mMapping)
@@ -853,8 +959,10 @@
bytesInUse -= bufferDataSize (art->contents) ;
delBuffer (art->contents) ;
-
art->contents = NULL ;
+
+ if (art->headers) delBuffer (art->headers) ;
+ art->headers = NULL ;
return true ;
}
diff -ruN inn-2.4.1-maxartsize.ORIG/innfeed/article.h inn-2.4.1-maxartsize/innfeed/article.h
--- inn-2.4.1-maxartsize.ORIG/innfeed/article.h Wed Jan 7 23:47:19 2004
+++ inn-2.4.1-maxartsize/innfeed/article.h Thu Oct 7 23:28:16 2004
@@ -44,7 +44,7 @@
/* return true if we have the article's contents (calling this may trigger
the reading off the disk). */
-bool artContentsOk (Article article) ;
+bool artContentsOk (Article article, bool headersOnly) ;
/* increments reference count and returns a copy of article that can be
kept (or passed off to someone else) */
@@ -55,16 +55,16 @@
/* return a list of buffers suitable for giving to an endpoint. The return
value can (must) be given to freeBufferArray */
-Buffer *artGetNntpBuffers (Article article) ;
+Buffer *artGetNntpBuffers (Article article, bool headersOnly) ;
/* return the message id stoed in the article object */
const char *artMsgId (Article article) ;
/* return size of the article */
-int artSize (Article article) ;
+int artSize (Article article, bool headersOnly) ;
/* return the number of buffers that artGetNntpBuffers() would return. */
-unsigned int artNntpBufferCount (Article article) ;
+unsigned int artNntpBufferCount (Article article, bool headersOnly) ;
/* tell the Article class to log (or not) missing articles as they occur. */
void artLogMissingArticles (bool val) ;
diff -ruN inn-2.4.1-maxartsize.ORIG/innfeed/connection.c inn-2.4.1-maxartsize/innfeed/connection.c
--- inn-2.4.1-maxartsize.ORIG/innfeed/connection.c Wed Jan 7 23:47:19 2004
+++ inn-2.4.1-maxartsize/innfeed/connection.c Thu Oct 7 01:25:36 2004
@@ -3062,7 +3062,7 @@
else
{
cxn->takesOkayed++ ;
- cxn->takesSizeOkayed += artSize(artHolder->article);
+ cxn->takesSizeOkayed += artSize(artHolder->article, hostHeadersOnly(cxn->myHost));
remArtHolder (artHolder, &cxn->takeRespHead, &cxn->articleQTotal) ;
if (cxn->articleQTotal == 0)
@@ -3172,7 +3172,7 @@
else
{
cxn->takesRejected++ ;
- cxn->takesSizeRejected += artSize(artHolder->article);
+ cxn->takesSizeRejected += artSize(artHolder->article, hostHeadersOnly(cxn->myHost));
remArtHolder (artHolder, &cxn->takeRespHead, &cxn->articleQTotal) ;
/* Some(?) hosts return the 439 response even before we're done
@@ -3238,7 +3238,7 @@
cxn->takeRespHead = NULL ;
cxn->articleQTotal = 0 ;
cxn->takesOkayed++ ;
- cxn->takesSizeOkayed += artSize(artHolder->article);
+ cxn->takesSizeOkayed += artSize(artHolder->article, hostHeadersOnly(cxn->myHost));
if (cxn->articleQTotal == 0)
cxnIdle (cxn) ;
@@ -3513,7 +3513,7 @@
artHolder = cxn->takeRespHead ;
cxn->takeRespHead = NULL ;
- cxn->takesSizeRejected += artSize(artHolder->article);
+ cxn->takesSizeRejected += artSize(artHolder->article, hostHeadersOnly(cxn->myHost));
/* Some servers return the 437 response before we're done sending. */
if (cxn->articleQTotal == 0 && !writeIsPending (cxn->myEp))
@@ -4050,7 +4050,7 @@
ASSERT (article != NULL) ;
if (cxn->state != cxnClosingS)
- writeArray = artGetNntpBuffers (article) ;
+ writeArray = artGetNntpBuffers (article, hostHeadersOnly(cxn->myHost)) ;
else
writeArray = NULL ;
@@ -4266,8 +4266,8 @@
/* count up all the buffers we'll be writing. One extra each time for
the TAKETHIS command buffer*/
for (p = cxn->takeHead ; p != NULL ; p = p->next)
- if (artContentsOk (p->article))
- lenArray += (1 + artNntpBufferCount (p->article)) ;
+ if (artContentsOk (p->article, hostHeadersOnly(cxn->myHost)))
+ lenArray += (1 + artNntpBufferCount (p->article, hostHeadersOnly(cxn->myHost))) ;
/* now allocate the array for the buffers and put them all in it */
/* 1 for the terminator */
@@ -4287,7 +4287,7 @@
int i, nntpLen ;
article = p->article ;
- nntpLen = artNntpBufferCount (article) ;
+ nntpLen = artNntpBufferCount (article, hostHeadersOnly(cxn->myHost)) ;
msgid = artMsgId (article) ;
if (nntpLen == 0)
@@ -4308,7 +4308,7 @@
}
else
{
- articleBuffers = artGetNntpBuffers (article) ;
+ articleBuffers = artGetNntpBuffers (article, hostHeadersOnly(cxn->myHost)) ;
/* set up the buffer with the TAKETHIS command in it.
12 == strlen ("TAKETHIS \n\r") */
diff -ruN inn-2.4.1-maxartsize.ORIG/innfeed/host.c inn-2.4.1-maxartsize/innfeed/host.c
--- inn-2.4.1-maxartsize.ORIG/innfeed/host.c Wed Jan 7 23:47:19 2004
+++ inn-2.4.1-maxartsize/innfeed/host.c Thu Oct 7 01:24:56 2004
@@ -93,6 +93,7 @@
unsigned int closePeriod;
unsigned int dynamicMethod;
bool wantStreaming;
+ bool headersOnly;
bool dropDeferred;
bool minQueueCxn;
double lowPassLow; /* as percentages */
@@ -504,6 +505,7 @@
params->closePeriod=CLOSE_PERIOD;
params->dynamicMethod=METHOD_STATIC;
params->wantStreaming=STREAM;
+ params->headersOnly=false;
params->dropDeferred=false;
params->minQueueCxn=false;
params->lowPassLow=NOCHECKLOW;
@@ -1286,6 +1288,8 @@
host->params->initialConnections) ;
fprintf (fp,"%s want-streaming : %s\n",indent,
boolToString (host->params->wantStreaming)) ;
+ fprintf (fp,"%s headers-only : %s\n",indent,
+ boolToString (host->params->headersOnly)) ;
fprintf (fp,"%s drop-deferred : %s\n",indent,
boolToString (host->params->dropDeferred)) ;
fprintf (fp,"%s min-queue-connection : %s\n",indent,
@@ -2128,7 +2132,7 @@
{
const char *filename = artFileName (article) ;
const char *msgid = artMsgId (article) ;
- double len = artSize (article);
+ double len = artSize (article, false);
d_printf (5,"Article %s (%s) was transferred\n", msgid, filename) ;
@@ -2191,7 +2195,7 @@
{
const char *filename = artFileName (article) ;
const char *msgid = artMsgId (article) ;
- double len = artSize (article);
+ double len = artSize (article, false);
d_printf (5,"Article %s (%s) was rejected\n", msgid, filename) ;
@@ -2451,6 +2455,11 @@
return host->params->wantStreaming ;
}
+bool hostHeadersOnly (Host host)
+{
+ return host->params->headersOnly ;
+}
+
unsigned int hostMaxChecks (Host host)
{
return host->params->maxChecks ;
@@ -2671,6 +2680,7 @@
GETINT(s,fp,"max-connections",0,LONG_MAX,REQ,p->absMaxConnections, inherit);
GETINT(s,fp,"max-queue-size",1,LONG_MAX,REQ,p->maxChecks, inherit);
GETBOOL(s,fp,"streaming",REQ,p->wantStreaming, inherit);
+ GETBOOL(s,fp,"headers-only",REQ,p->headersOnly, inherit);
GETBOOL(s,fp,"drop-deferred",REQ,p->dropDeferred, inherit);
GETBOOL(s,fp,"min-queue-connection",REQ,p->minQueueCxn, inherit);
GETREAL(s,fp,"no-check-high",0.0,100.0,REQ,p->lowPassHigh, inherit);
diff -ruN inn-2.4.1-maxartsize.ORIG/innfeed/host.h inn-2.4.1-maxartsize/innfeed/host.h
--- inn-2.4.1-maxartsize.ORIG/innfeed/host.h Wed Jan 7 23:47:19 2004
+++ inn-2.4.1-maxartsize/innfeed/host.h Thu Oct 7 01:20:36 2004
@@ -173,6 +173,9 @@
/* return maxChecks */
unsigned int hostmaxChecks (Host host);
+/* return whether or not we're sending a headers-only feed */
+bool hostHeadersOnly (Host host);
+
/* return if we should drop deferred articles */
bool hostDropDeferred (Host host);
diff -ruN inn-2.4.1-maxartsize.ORIG/innfeed/imap_connection.c inn-2.4.1-maxartsize/innfeed/imap_connection.c
--- inn-2.4.1-maxartsize.ORIG/innfeed/imap_connection.c Wed Jan 7 23:47:19 2004
+++ inn-2.4.1-maxartsize/innfeed/imap_connection.c Thu Oct 7 01:23:59 2004
@@ -1045,7 +1045,7 @@
int t;
/* make sure contents ok; this also should load it into memory */
- if (!artContentsOk (art)) {
+ if (!artContentsOk (art, false)) {
d_printf(0, "%s:%d AddControlMsg(): "
"artContentsOk() said article was bad\n",
hostPeerName (cxn->myHost), cxn->ident);
@@ -3897,7 +3897,7 @@
}
/* make sure contents ok; this also should load it into memory */
- res = artContentsOk (item->data.article);
+ res = artContentsOk (item->data.article, false);
if (res==false)
{
if (justadded == item->data.article) {
@@ -3912,7 +3912,7 @@
}
/* Check if it's a control message */
- bufs = artGetNntpBuffers (item->data.article);
+ bufs = artGetNntpBuffers (item->data.article, false);
if (bufs == NULL)
{
/* tell to reject taking this message */
--
The From: and Reply-To: addresses are internal news2mail gateway addresses.
Reply to the list or to "Miquel van Smoorenburg" <miquels at cistron.nl>
More information about the inn-workers
mailing list