PATCH: diablo style hashfeeds
"Miquel van Smoorenburg"
list-inn-workers at news.cistron.nl
Sun Mar 23 13:13:01 UTC 2008
This patch makes it possible to configure so-called 'hashfeeds':
an articles message-id is hashed to an integer value and you
can send articles to a peer based on the range(s) the value is in.
It's compatible with diablo, both new-style and old (pre-5.1) style.
inn-2.4.3-hashfeed.patch
diff -ruN t/inn-2.4.3/doc/pod/newsfeeds.pod inn-2.4.3/doc/pod/newsfeeds.pod
--- t/inn-2.4.3/doc/pod/newsfeeds.pod 2006-03-20 05:14:57.000000000 +0100
+++ inn-2.4.3/doc/pod/newsfeeds.pod 2008-03-15 15:02:41.000000000 +0100
@@ -382,6 +382,30 @@
new process will run with. This flag can be used to raise the priority to
normal if you're using the I<nicekids> parameter in F<inn.conf>.
+=item B<Q> I<hashfeed>
+
+The hashfeed match expression for this site. It must be in the form
+"value/mod" or "start-end/mod". The message-id of the article is
+hashed using MD5, and then the lowest 32 bits are taken as the "hashfeed
+value". If the hashfeed value modulus "mod" plus one equals "number"
+or is between "start" and "end", the article will be fed to the site.
+
+Example:
+
+ Q1/2 Feeds about 50% of the messages to this site
+ Q2/2 Feeds the other 50% of the messages
+
+ Q1-3/10 Feeds about 30% of the messages
+ Q4-5/10 Feeds about 20% of the messages
+ Q6/10 Feeds about 50% of the messages
+
+If this flag is specified multiple times the contents will be
+logically ORed together (just one match needed).
+
+The algorithm is compatible with the one used by Diablo 5.1 and up.
+If you want to use the hashing method Diablo used before 5.1,
+put an '@' sign just after the 'Q' (for example: Q at 1/2).
+
=item B<S> I<size>
If the amount of data queued for the site gets to be larger than I<size>
diff -ruN t/inn-2.4.3/innd/art.c inn-2.4.3/innd/art.c
--- t/inn-2.4.3/innd/art.c 2006-03-20 05:14:57.000000000 +0100
+++ inn-2.4.3/innd/art.c 2008-03-15 16:20:21.000000000 +0100
@@ -9,6 +9,7 @@
#include "inn/innconf.h"
#include "inn/wire.h"
+#include "inn/md5.h"
#include "innd.h"
#include "ov.h"
#include "storage.h"
@@ -1553,6 +1562,74 @@
}
/*
+** Even though we have already calculated the message-id MD5sum,
+** we have to do it again since unfortunately HashMessageID()
+** lowercases the message-id first.
+*/
+
+static unsigned int
+HashFeedMD5(char *MessageID, int offset)
+{
+ static char LastMessageID[128];
+ static char *LastMessageIDPtr;
+ static struct md5_context context;
+ unsigned int ret;
+
+ if (offset < 0 || offset > 12)
+ return 0;
+
+ /* Some light caching */
+ if (MessageID != LastMessageIDPtr ||
+ strcmp(MessageID, LastMessageID) != 0) {
+ md5_init(&context);
+ md5_update(&context, MessageID, strlen(MessageID));
+ md5_final(&context);
+ LastMessageIDPtr = MessageID;
+ strncpy(LastMessageID, MessageID, sizeof(LastMessageID) - 1);
+ LastMessageID[sizeof(LastMessageID) - 1] = 0;
+ }
+
+ memcpy(&ret, &context.digest[12 - offset], 4);
+
+ return ntohl(ret);
+}
+
+/*
+** Return true if an element of the HASHFEEDLIST matches
+** the hash of the message-id.
+*/
+static bool
+HashFeedMatch(HASHFEEDLIST *hf, char *MessageID)
+{
+ unsigned char *p;
+ unsigned int qhash;
+ unsigned int h;
+ int n;
+
+ /* Calculate old diablo (< 5.1) 32 bits quickhash */
+ p = (unsigned char *)MessageID;
+ n = 0;
+ while (*p)
+ n += *p++;
+ qhash = n;
+
+ while (hf) {
+ if (hf->type == HASHFEED_MD5)
+ h = HashFeedMD5(MessageID, hf->offset);
+ else if (hf->type == HASHFEED_QH)
+ h = qhash;
+ else
+ continue;
+ if ((h % hf->mod + 1) >= hf->begin &&
+ (h % hf->mod + 1) <= hf->end)
+ return true;
+ hf = hf->next;
+ }
+
+ return false;
+}
+
+/*
** Propagate an article to the sites have "expressed an interest."
*/
static void
@@ -1625,6 +1702,11 @@
* cross-posting. */
continue;
+ if (sp->HashFeedList &&
+ !HashFeedMatch(sp->HashFeedList, HDR(HDR__MESSAGE_ID)))
+ /* hashfeed doesn't match */
+ continue;
+
if (list && *list != NULL && sp->Distributions &&
!DISTwantany(sp->Distributions, list))
/* Not in the site's desired list of distributions. */
diff -ruN t/inn-2.4.3/innd/innd.h inn-2.4.3/innd/innd.h
--- t/inn-2.4.3/innd/innd.h 2006-03-20 05:14:57.000000000 +0100
+++ inn-2.4.3/innd/innd.h 2008-03-15 14:51:05.000000000 +0100
@@ -407,6 +409,22 @@
/*
+** Diablo-style hashed feeds or hashfeeds.
+*/
+#define HASHFEED_QH 1
+#define HASHFEED_MD5 2
+
+typedef struct _HASHFEEDLIST {
+ int type;
+ int begin;
+ int end;
+ int mod;
+ int offset;
+ struct _HASHFEEDLIST *next;
+} HASHFEEDLIST;
+
+
+/*
** A site may reject something in its subscription list if it has
** too many hops, or a bad distribution.
*/
@@ -458,6 +476,7 @@
struct buffer Buffer;
bool Buffered;
char ** Originator;
+ HASHFEEDLIST * HashFeedList;
int Next;
int Prev;
} SITE;
diff -ruN t/inn-2.4.3/innd/newsfeeds.c inn-2.4.3/innd/newsfeeds.c
--- t/inn-2.4.3/innd/newsfeeds.c 2006-03-20 05:14:57.000000000 +0100
+++ inn-2.4.3/innd/newsfeeds.c 2008-03-15 14:55:15.000000000 +0100
@@ -448,6 +448,7 @@
int isp;
SITE *nsp;
struct buffer b;
+ HASHFEEDLIST *hf;
b = sp->Buffer;
*sp = SITEnull;
@@ -467,6 +468,7 @@
sp->NeedOverviewCreation = false;
sp->FeedwithoutOriginator = false;
sp->DropFiltered = false;
+ sp->HashFeedList = NULL;
/* Nip off the first field, the site name. */
if ((f2 = strchr(Entry, NF_FIELD_SEP)) == NULL)
@@ -603,6 +605,29 @@
if (*++p && CTYPE(isdigit, *p))
sp->Nice = atoi(p);
break;
+ case 'Q':
+ hf = xmalloc(sizeof(HASHFEEDLIST));
+ p++;
+ if (*p == '@') {
+ p++;
+ hf->type = HASHFEED_QH;
+ } else
+ hf->type = HASHFEED_MD5;
+ if ((u = strchr(p, ':')) != NULL)
+ hf->offset = atoi(u + 1);
+ else
+ hf->offset = 0;
+ if (sscanf(p, "%d-%d/%d", &hf->begin, &hf->end, &hf->mod) != 3) {
+ if (sscanf(p, "%d/%d", &hf->begin, &hf->mod) == 2) {
+ hf->end = hf->begin;
+ } else {
+ free(hf);
+ return "hash not in x/z or x-y/z format";
+ }
+ }
+ hf->next = sp->HashFeedList;
+ sp->HashFeedList = hf;
+ break;
case 'S':
if (*++p && CTYPE(isdigit, *p))
sp->StartSpooling = atol(p);
diff -ruN t/inn-2.4.3/innd/site.c inn-2.4.3/innd/site.c
--- t/inn-2.4.3/innd/site.c 2006-03-20 05:14:57.000000000 +0100
+++ inn-2.4.3/innd/site.c 2008-03-15 14:52:31.000000000 +0100
@@ -997,6 +1004,7 @@
SITEfree(SITE *sp)
{
SITE *s;
+ HASHFEEDLIST *hf, *hn;
int new;
int i;
@@ -1051,6 +1059,13 @@
sp->FNLnames.data = NULL;
sp->FNLnames.size = 0;
}
+ if (sp->HashFeedList) {
+ for (hf = sp->HashFeedList; hf; hf = hn) {
+ hn = hf->next;
+ free(hf);
+ }
+ sp->HashFeedList = NULL;
+ }
/* If this site was a master, find a new one. */
if (sp->IsMaster) {
--
The From: and Reply-To: addresses are internal news2mail gateway addresses.
Reply to the list or to "Miquel van Smoorenburg" <miquels at cistron.nl>
More information about the inn-workers
mailing list