INN commit: branches/2.5 (6 files)

INN Commit rra at isc.org
Sun Nov 15 09:26:19 UTC 2009


    Date: Sunday, November 15, 2009 @ 01:26:19
  Author: iulius
Revision: 8784

Improve syntax checks on message-IDs:  nnrpd was only based
on RFC 3977, and innd on RFC 1036.
They are now both based on RFC 5536 (USEFOR).

Modified:
  branches/2.5/include/inn/libinn.h
  branches/2.5/innd/art.c
  branches/2.5/innd/innd.h
  branches/2.5/innd/nc.c
  branches/2.5/lib/messageid.c
  branches/2.5/nnrpd/nnrpd.c

----------------------+
 include/inn/libinn.h |    1 
 innd/art.c           |  131 +++++++------------------------------------------
 innd/innd.h          |    1 
 innd/nc.c            |   10 +--
 lib/messageid.c      |  122 +++++++++++++++++++++++++++++++++++----------
 nnrpd/nnrpd.c        |    3 +
 6 files changed, 125 insertions(+), 143 deletions(-)

Modified: include/inn/libinn.h
===================================================================
--- include/inn/libinn.h	2009-11-15 09:26:00 UTC (rev 8783)
+++ include/inn/libinn.h	2009-11-15 09:26:19 UTC (rev 8784)
@@ -151,6 +151,7 @@
 
 /* Headers. */
 extern char *           GenerateMessageID(char *domain);
+extern void             InitializeMessageIDcclass(void);
 extern bool             IsValidMessageID(const char *string);
 extern bool             IsValidHeaderName(const char *string);
 extern void             HeaderCleanFrom(char *from);

Modified: innd/art.c
===================================================================
--- innd/art.c	2009-11-15 09:26:00 UTC (rev 8783)
+++ innd/art.c	2009-11-15 09:26:19 UTC (rev 8784)
@@ -71,15 +71,10 @@
 static char	*ARTpathme;
 
 /*
-**  Flag array, indexed by character.  Character classes for Message-ID's.
+**  Flag array, indexed by character.  Character classes for hostnames.
 */
-static char		ARTcclass[256];
-#define CC_MSGID_ATOM	01
-#define CC_MSGID_NORM	02
-#define CC_HOSTNAME	04
-#define ARTnormchar(c)	((ARTcclass[(unsigned char)(c)] & CC_MSGID_NORM) != 0)
-#define ARTatomchar(c)	((ARTcclass[(unsigned char)(c)] & CC_MSGID_ATOM) != 0)
-#define ARThostchar(c)	((ARTcclass[(unsigned char)(c)] & CC_HOSTNAME) != 0)
+static char             hostcclass[256];
+#define ARThostchar(c)  ((hostcclass[(unsigned char)(c)]) != 0)
 
 #if defined(DO_PERL) || defined(DO_PYTHON)
 const char	*filterPath;
@@ -239,32 +234,28 @@
 void
 ARTsetup(void)
 {
-  const char *	p;
   const ARTHEADER **	table;
+  const unsigned char *p;
   unsigned int	i;
 
   /* Set up the character class tables.  These are written a
    * little strangely to work around a GCC2.0 bug. */
-  memset(ARTcclass, 0, sizeof ARTcclass);
-  p = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+  memset(hostcclass, 0, sizeof(hostcclass));
+
+  p = (const unsigned char*) "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
   while ((i = *p++) != 0) {
-    ARTcclass[i] = CC_HOSTNAME | CC_MSGID_ATOM | CC_MSGID_NORM;
+    hostcclass[i] = 1;
   }
-  p = "!#$%&'*+-/=?^_`{|}~";
-  while ((i = *p++) != 0) {
-    ARTcclass[i] = CC_MSGID_ATOM | CC_MSGID_NORM;
-  }
-  p = "\"(),.:;<@[\\]";
-  while ((i = *p++) != 0) {
-    ARTcclass[i] = CC_MSGID_NORM;
-  }
 
-  /* The RFC's don't require it, but we add underscore to the list of valid
-   * hostname characters. */
-  ARTcclass['.'] |= CC_HOSTNAME;
-  ARTcclass['-'] |= CC_HOSTNAME;
-  ARTcclass['_'] |= CC_HOSTNAME;
+  /* The RFCs don't require it, but we add underscore to the list
+   * of valid hostname characters. */
+  hostcclass['.'] = 1;
+  hostcclass['-'] = 1;
+  hostcclass['_'] = 1;
 
+  /* Also initialize the character class tables for message-IDs. */
+  InitializeMessageIDcclass();
+
   /* Build the header tree. */
   table = xmalloc(ARRAY_SIZE(ARTheaders) * sizeof(ARTHEADER *));
   for (i = 0; i < ARRAY_SIZE(ARTheaders); i++)
@@ -715,87 +706,7 @@
   return;
 }
 
-/*
-**  Check message-ID format based on RFC 5322 grammar, except that (as per
-**  USEFOR, RFC 5536) whitespace, non-printing, and '>' characters are excluded.
-**  Based on code by Paul Eggert posted to news.software.b on 22-Nov-90
-**  in <#*tyo2'~n at twinsun.com>, with additional e-mail discussion.
-**  Thanks, Paul.
-*/
-bool
-ARTidok(const char *MessageID)
-{
-  int		c;
-  const char	*p;
 
-  /* Check the length of the message-ID. */
-  if (MessageID == NULL || strlen(MessageID) > NNTP_MAXLEN_MSGID)
-    return false;
-
-  /* Scan local-part:  "< atom|quoted [ . atom|quoted]" */
-  p = MessageID;
-  if (*p++ != '<')
-    return false;
-  for (; ; p++) {
-    if (ARTatomchar(*p))
-      while (ARTatomchar(*++p))
-	continue;
-    else {
-      if (*p++ != '"')
-	return false;
-      for ( ; ; ) {
-	switch (c = *p++) {
-	case '\\':
-	  c = *p++;
-	  /* FALLTHROUGH */
-	default:
-	  if (ARTnormchar(c))
-	    continue;
-	  return false;
-	case '"':
-	  break;
-	}
-	break;
-      }
-    }
-    if (*p != '.')
-      break;
-  }
-
-  /* Scan domain part:  "@ atom|domain [ . atom|domain] > \0" */
-  if (*p++ != '@')
-    return false;
-  for ( ; ; p++) {
-    if (ARTatomchar(*p))
-      while (ARTatomchar(*++p))
-	continue;
-    else {
-      if (*p++ != '[')
-	return false;
-      for ( ; ; ) {
-	switch (c = *p++) {
-	case '\\':
-	  c = *p++;
-	  /* FALLTHROUGH */
-	default:
-	  if (ARTnormchar(c))
-	    continue;
-	  /* FALLTHROUGH */
-	case '[':
-	  return false;
-	case ']':
-	  break;
-	}
-	break;
-      }
-    }
-    if (*p != '.')
-      break;
-  }
-
-  return *p == '>' && *++p == '\0';
-}
-
 /*
 **  Clean up data field where article informations are stored.
 **  This must be called before article processing.
@@ -1088,8 +999,8 @@
     }
   }
 
-  /* assumes Message-ID header is required header */
-  if (!ARTidok(HDR(HDR__MESSAGE_ID))) {
+  /* Assumes the Message-ID: header is a required header. */
+  if (!IsValidMessageID(HDR(HDR__MESSAGE_ID))) {
     HDR_LEN(HDR__MESSAGE_ID) = 0;
     sprintf(buff, "%d Bad \"Message-ID\" header",
             ihave ? NNTP_FAIL_IHAVE_REJECT : NNTP_FAIL_TAKETHIS_REJECT);
@@ -1297,7 +1208,7 @@
     return;
   }
 
-  if (!ARTidok(MessageID)) {
+  if (!IsValidMessageID(MessageID)) {
     syslog(L_NOTICE, "%s bad cancel Message-ID %s", data->Feedsite,
       MaxLength(MessageID, MessageID));
     TMRstop(TMR_ARTCNCL);
@@ -1356,7 +1267,7 @@
   if (c == 'c' && strncmp(Control, "cancel", 6) == 0) {
     for (p = &Control[6]; ISWHITE(*p); p++)
       continue;
-    if (*p && ARTidok(p))
+    if (*p && IsValidMessageID(p))
       ARTcancel(data, p, false);
     return;
   }
@@ -2627,7 +2538,7 @@
       ARTcontrol(data, HDR(HDR__CONTROL), cp);
     }
     if (DoCancels && HDR_FOUND(HDR__SUPERSEDES)) {
-      if (ARTidok(HDR(HDR__SUPERSEDES)))
+      if (IsValidMessageID(HDR(HDR__SUPERSEDES)))
 	ARTcancel(data, HDR(HDR__SUPERSEDES), false);
     }
   }

Modified: innd/innd.h
===================================================================
--- innd/innd.h	2009-11-15 09:26:00 UTC (rev 8783)
+++ innd/innd.h	2009-11-15 09:26:19 UTC (rev 8784)
@@ -687,7 +687,6 @@
 extern void		ReopenLog(FILE *F);
 extern void		xchown(char *p);
 
-extern bool		ARTidok(const char *MessageID);
 extern bool		ARTreadschema(void);
 extern const char   *	ARTreadarticle(char *files);
 extern char	    *   ARTreadheader(char *files);

Modified: innd/nc.c
===================================================================
--- innd/nc.c	2009-11-15 09:26:00 UTC (rev 8783)
+++ innd/nc.c	2009-11-15 09:26:19 UTC (rev 8784)
@@ -322,7 +322,7 @@
         return;
     }
 
-    if (!ARTidok(cp->av[1])) {
+    if (!IsValidMessageID(cp->av[1])) {
         xasprintf(&buff, "%d Syntax error in message-ID", NNTP_ERR_SYNTAX);
         NCwritereply(cp, buff);
         free(buff);
@@ -388,7 +388,7 @@
         return;
     }
 
-    if (!ARTidok(cp->av[1])) {
+    if (!IsValidMessageID(cp->av[1])) {
         xasprintf(&buff, "%d Syntax error in message-ID", NNTP_ERR_SYNTAX);
         NCwritereply(cp, buff);
         free(buff);
@@ -605,7 +605,7 @@
     cp->Ihave++;
     cp->Start = cp->Next;
 
-    if (!ARTidok(cp->av[1])) {
+    if (!IsValidMessageID(cp->av[1])) {
         /* Return 435 here instead of 501 for compatibility reasons. */
         xasprintf(&buff, "%d Syntax error in message-ID", NNTP_FAIL_IHAVE_REFUSE);
         NCwritereply(cp, buff);
@@ -1705,7 +1705,7 @@
             cp->Sendid.size = MAXHEADERSIZE;
         cp->Sendid.data = xmalloc(cp->Sendid.size);
     }
-    if (!ARTidok(cp->av[1])) {
+    if (!IsValidMessageID(cp->av[1])) {
 	snprintf(cp->Sendid.data, cp->Sendid.size, "%d %s",
                  NNTP_FAIL_CHECK_REFUSE, cp->av[1]);
 	NCwritereply(cp, cp->Sendid.data);
@@ -1797,7 +1797,7 @@
     cp->Takethis++;
     cp->Start = cp->Next;
 
-    if (!ARTidok(cp->av[1])) {
+    if (!IsValidMessageID(cp->av[1])) {
 	syslog(L_NOTICE, "%s bad_messageid %s", CHANname(cp),
                MaxLength(cp->av[1], cp->av[1]));
     }

Modified: lib/messageid.c
===================================================================
--- lib/messageid.c	2009-11-15 09:26:00 UTC (rev 8783)
+++ lib/messageid.c	2009-11-15 09:26:19 UTC (rev 8784)
@@ -10,10 +10,20 @@
 
 #include "inn/innconf.h"
 #include "inn/libinn.h"
+#include "inn/nntp.h"
 
-/*  Scale time back a bit, for shorter message-ID's. */
+/*  Scale time back a bit, for shorter message-IDs. */
 #define OFFSET	673416000L
 
+/*
+**  Flag array, indexed by character.  Character classes for message-IDs.
+*/
+static char             midcclass[256];
+#define CC_MSGID_ATOM   01
+#define CC_MSGID_NORM   02
+#define midnormchar(c)  ((midcclass[(unsigned char)(c)] & CC_MSGID_NORM) != 0)
+#define midatomchar(c)  ((midcclass[(unsigned char)(c)] & CC_MSGID_ATOM) != 0)
+
 char *
 GenerateMessageID(char *domain)
 {
@@ -41,7 +51,37 @@
 
 
 /*
-**  We currently only check the requirements for RFC 3977:
+**  Initialize the character class tables.
+*/
+void
+InitializeMessageIDcclass(void)
+{
+    const unsigned char *p;
+    unsigned int i;
+
+    /* Set up the character class tables.  These are written a
+     * little strangely to work around a GCC2.0 bug. */
+    memset(midcclass, 0, sizeof(midcclass));
+        
+    p = (const unsigned char*) "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    while ((i = *p++) != 0) {
+        midcclass[i] = CC_MSGID_ATOM | CC_MSGID_NORM;
+    }
+      
+    p = (const unsigned char*) "!#$%&'*+-/=?^_`{|}~";
+    while ((i = *p++) != 0) {
+        midcclass[i] = CC_MSGID_ATOM | CC_MSGID_NORM;
+    }
+
+    p = (const unsigned char*) "\"(),.:;<@";
+    while ((i = *p++) != 0) {
+        midcclass[i] = CC_MSGID_NORM;
+    }
+}
+
+
+/*
+**  According to RFC 3977:
 **
 **    o  A message-ID MUST begin with "<", end with ">", and MUST NOT
 **       contain the latter except at the end.
@@ -50,42 +90,70 @@
 **
 **    o  A message-ID MUST NOT contain octets other than printable US-ASCII
 **       characters.
+**
+**  Besides, we check message-ID format based on RFC 5322 grammar, except that
+**  (as per USEFOR, RFC 5536) whitespace, non-printing, and '>' characters
+**  are excluded.
+**  Based on code by Paul Eggert posted to news.software.b on 22-Nov-90
+**  in <#*tyo2'~n at twinsun.com>, with additional e-mail discussion.
+**  Thanks, Paul, for the original implementation based upon RFC 1036.
+**  Updated to RFC 5536 by Julien Elie.
 */
 bool
-IsValidMessageID(const char *string)
+IsValidMessageID(const char *MessageID)
 {
-    int len = 0;
+    int c;
     const unsigned char *p;
 
-    /* Not NULL. */
-    if (string == NULL)
+    /* Check the length of the message-ID. */
+    if (MessageID == NULL || strlen(MessageID) > NNTP_MAXLEN_MSGID)
         return false;
 
-    p = (const unsigned char *) string;
+    p = (const unsigned char *) MessageID;
 
-    /* Begins with "<". */
-    if (p[0] != '<')
+    /* Scan local-part:  "< dot-atom-text". */
+    if (*p++ != '<')
         return false;
-
-    for (; *p != '\0'; p++) {
-        len++;
-        /* Contains ">" *only* at the end. */
-        if (*p == '>') {
-            p++;
-            if (*p != '\0')
+    for (; ; p++) {
+        if (midatomchar(*p)) {
+            while (midatomchar(*++p))
+                continue;
+        } else {
+            return false;
+        }
+        if (*p != '.')
+            break;
+    }
+    
+    /* Scan domain part:  "@ dot-atom-text|no-fold-literal > \0" */
+    if (*p++ != '@')
+        return false;
+    for ( ; ; p++) {
+        if (midatomchar(*p)) {
+            while (midatomchar(*++p))
+                continue;
+        } else {
+            /* no-fold-literal only */
+            if (p[-1] != '@' || *p++ != '[')
                 return false;
-            else
+            for ( ; ; ) {
+                switch (c = *p++) {
+                    default:
+                        if (midnormchar(c)) {
+                            continue;
+                        } else {
+                            return false;
+                        }
+                    case ']':
+                        break;
+                }
                 break;
+            }
+            break;
         }
-        /* Contains only printable US-ASCII characters. */
-        if (!CTYPE(isgraph, *p))
-            return false;
+        if (*p != '.')
+            break;
     }
-
-    /* Between 3 and 250 octets in length.
-     * Ends with ">". */
-    if (len < 3 || len > 250 || p[-1] != '>')
-        return false;
-    else
-        return true;
+    
+    return *p == '>' && *++p == '\0';
 }

Modified: nnrpd/nnrpd.c
===================================================================
--- nnrpd/nnrpd.c	2009-11-15 09:26:00 UTC (rev 8783)
+++ nnrpd/nnrpd.c	2009-11-15 09:26:19 UTC (rev 8784)
@@ -898,6 +898,9 @@
     message_handlers_warn(1, message_log_syslog_warning);
     message_handlers_notice(1, message_log_syslog_notice);
 
+    /* Initialize the character class tables for message-IDs. */
+    InitializeMessageIDcclass();
+
     if (!innconf_read(NULL))
         exit(1);
 




More information about the inn-committers mailing list