INN commit: branches/2.5 (6 files)

INN Commit rra at isc.org
Mon Sep 28 19:59:01 UTC 2009


    Date: Monday, September 28, 2009 @ 12:59:01
  Author: iulius
Revision: 8637

Add a function to validate the encoding of UTF-8 strings.
"ctlinnd pause", "ctlinnd readers", "ctlinnd reject",
"ctlinnd reserve", "ctlinnd throttle" and "nnrpd -r" commands
now expect a properly encoded reason.

Modified:
  branches/2.5/doc/pod/news.pod
  branches/2.5/include/inn/libinn.h
  branches/2.5/innd/cc.c
  branches/2.5/lib/uwildmat.c
  branches/2.5/nnrpd/nnrpd.c
  branches/2.5/tests/lib/uwildmat-t.c

------------------------+
 doc/pod/news.pod       |    7 +++++++
 include/inn/libinn.h   |    1 +
 innd/cc.c              |   29 +++++++++++++++++++++--------
 lib/uwildmat.c         |   43 ++++++++++++++++++++++++++++++++++++++++++-
 nnrpd/nnrpd.c          |    3 ++-
 tests/lib/uwildmat-t.c |   30 +++++++++++++++++++++++++++++-
 6 files changed, 102 insertions(+), 11 deletions(-)

Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod	2009-09-28 19:57:30 UTC (rev 8636)
+++ doc/pod/news.pod	2009-09-28 19:59:01 UTC (rev 8637)
@@ -130,6 +130,13 @@
 
 =item *
 
+As UTF-8 is the default character set in S<RFC 3977>, C<ctlinnd pause>,
+C<ctlinnd readers>, C<ctlinnd reject>, C<ctlinnd throttle> and
+C<nnrpd -r> commands now require the given reason to be encoded in UTF-8,
+so that it can be properly sent to news readers.
+
+=item *
+
 The output of consistency checks for article storage and the F<history>
 file no longer appears by default when C<cnfsstat -a> is used.  A new B<-v>
 flag has been added to B<cnfsstat> so as to see it.

Modified: include/inn/libinn.h
===================================================================
--- include/inn/libinn.h	2009-09-28 19:57:30 UTC (rev 8636)
+++ include/inn/libinn.h	2009-09-28 19:59:01 UTC (rev 8637)
@@ -107,6 +107,7 @@
     UWILDMAT_POISON
 };
 
+extern bool             is_valid_utf8(const char *start);
 extern bool             uwildmat(const char *text, const char *pat);
 extern bool             uwildmat_simple(const char *text, const char *pat);
 extern enum uwildmat    uwildmat_poison(const char *text, const char *pat);

Modified: innd/cc.c
===================================================================
--- innd/cc.c	2009-09-28 19:57:30 UTC (rev 8636)
+++ innd/cc.c	2009-09-28 19:59:01 UTC (rev 8637)
@@ -275,7 +275,7 @@
 
 
 /*
-**  Do the work to allow foreign connectiosn.
+**  Do the work to allow foreign connections.
 */
 static const char *
 CCallow(char *av[])
@@ -1001,6 +1001,13 @@
 	if (CTYPE(isupper, Rest[0]))
 	    Rest[0] = tolower(Rest[0]);
     }
+
+    who = av[2];
+    if (*who == '\0')
+        who = NEWSMASTER;
+    if (!is_valid_utf8(who))
+        return "1 Invalid UTF-8 creator's name";
+
     if (strlen(Name) + strlen(Rest) > SMBUF - 24)
 	return "1 Name too long";
 
@@ -1018,9 +1025,6 @@
 	IOError(WHEN, oerrno);
     }
     else {
-	who = av[2];
-	if (*who == '\0')
-	    who = NEWSMASTER;
         xasprintf(&buff, "%s %ld %s\n", Name, (long) Now.tv_sec, who);
 	if (xwrite(fd, buff, strlen(buff)) < 0) {
 	    oerrno = errno;
@@ -1145,9 +1149,12 @@
     if (*reason == '\0')
 	return CCnoreason;
 
-    if (strlen(reason) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe */
+    if (strlen(reason) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe. */
 	return CCbigreason;
 
+    if (!is_valid_utf8(reason))
+        return "1 Invalid UTF-8 reason";
+
     if (Reservation) {
 	if (strcmp(reason, Reservation) != 0) {
 	    snprintf(CCreply.data, CCreply.size, "1 Reserved \"%s\"",
@@ -1228,8 +1235,10 @@
 	p = av[1];
 	if (*p == '\0')
 	    return CCnoreason;
-	if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe */
+	if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe. */
 	    return CCbigreason;
+        if (!is_valid_utf8(p))
+            return "1 Invalid UTF-8 reason";
 	NNRPReason = xstrdup(p);
 	break;
     }
@@ -1286,8 +1295,10 @@
 {
     if (RejectReason)
 	return "1 Already rejecting";
-    if (strlen(av[0]) > MAX_REASON_LEN)	/* MAX_REASON_LEN is as big as is safe */
+    if (strlen(av[0]) > MAX_REASON_LEN)	/* MAX_REASON_LEN is as big as is safe. */
 	return CCbigreason;
+    if (!is_valid_utf8(av[0]))
+        return "1 Invalid UTF-8 reason";
     RejectReason = xstrdup(av[0]);
     return NULL;
 }
@@ -1452,8 +1463,10 @@
 	/* Trying to make a reservation. */
 	if (Reservation)
 	    return "1 Already reserved";
-	if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe */
+	if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe. */
 	    return CCbigreason;
+        if (!is_valid_utf8(p))
+            return "1 Invalid UTF-8 reason";
 	Reservation = xstrdup(p);
     }
     else {

Modified: lib/uwildmat.c
===================================================================
--- lib/uwildmat.c	2009-09-28 19:57:30 UTC (rev 8636)
+++ lib/uwildmat.c	2009-09-28 19:59:01 UTC (rev 8637)
@@ -93,7 +93,6 @@
     if (end != NULL && (end - start + 1) < length)
         return 1;
     left = length - 1;
-    p = start + 1;
     for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
         left--;
     return (left == 0) ? length : 1;
@@ -101,6 +100,48 @@
 
 
 /*
+**  Check whether a string contains only valid UTF-8 characters.
+*/
+bool
+is_valid_utf8(const char *text)
+{
+    unsigned char mask;
+    const unsigned char *p;
+    int length;
+    int left;
+
+    for (p = (const unsigned char *)text; *p != '\0';) {
+        mask = 0x80;
+        length = 0;
+
+        /* Find out the expected length of the character. */
+        for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
+            length++;
+
+        p++;
+
+        /* Valid ASCII. */
+        if (length == 0)
+            continue;
+        
+        /* Invalid length. */
+        if (length < 2 || length > 6)
+            return false;
+
+        /* Check that each byte looks like 10xxxxxx, except for the first. */
+        left = length - 1;
+        for (; left > 0 && (*p & 0xc0) == 0x80; p++)
+            left--;
+
+        if (left > 0)
+            return false;
+    }
+
+    return true;
+}
+
+
+/*
 **  Convert a UTF-8 character to UCS-4.  Takes a pointer to the start of the
 **  character and to the last octet of the string, and to a uint32_t into
 **  which to put the decoded UCS-4 value.  If end is NULL, expect the string

Modified: nnrpd/nnrpd.c
===================================================================
--- nnrpd/nnrpd.c	2009-09-28 19:57:30 UTC (rev 8636)
+++ nnrpd/nnrpd.c	2009-09-28 19:59:01 UTC (rev 8637)
@@ -1225,7 +1225,8 @@
     /* Were we told to reject connections? */
     if (Reject) {
 	syslog(L_NOTICE, "%s rejected %s", Client.host, Reject);
-	Reply("%d %s\r\n", NNTP_FAIL_TERMINATING, Reject);
+	Reply("%d %s\r\n", NNTP_FAIL_TERMINATING,
+              is_valid_utf8(Reject) ? Reject : "Connection rejected");
 	ExitWithStats(0, false);
     }
 

Modified: tests/lib/uwildmat-t.c
===================================================================
--- tests/lib/uwildmat-t.c	2009-09-28 19:57:30 UTC (rev 8636)
+++ tests/lib/uwildmat-t.c	2009-09-28 19:59:01 UTC (rev 8637)
@@ -44,10 +44,21 @@
         printf("  %s\n  %s\n  expected %d\n", text, pattern, matches);
 }
 
+static void
+test_v(int n, const char *text, bool matches)
+{
+    bool matched;
+
+    matched = is_valid_utf8(text);
+    printf("%sok %d\n", matched == matches ? "" : "not ", n);
+    if (matched != matches)
+        printf("  %s\n  expected %d\n", text, matches);
+}
+
 int
 main(void)
 {
-    test_init(174);
+    test_init(187);
 
     /* Basic wildmat features. */
     test_r(  1, "foo",            "foo",               true);
@@ -249,5 +260,22 @@
     test_r(174, "\303\206\357\277\277",
                                   "*[^\303\206]",      true);
 
+    /* Tests for the is_valid_utf8 interface. */
+    test_v(175, "a",                                   true);
+    test_v(176, "aaabbb",                              true);
+    test_v(177, "test\303\251\302\240!",               true);
+    test_v(178, "\200",                                false);
+    test_v(179, "\277",                                false);
+    test_v(180, "\300 ",                               false);
+    test_v(181, "\340\277",                            false);
+    test_v(182, "\374\277\277\277\277",                false);
+    test_v(183, "\374\277\277\277\277\277",            true);
+    test_v(184, "a\303\251b\303\251c\374\277\277\277\277\277",
+                                                       true);
+    test_v(185, "a\303\251b\303c\374\277\277\277\277\277",
+                                                       false);
+    test_v(186, "",                                    true);
+    test_v(187, "a\303\251b\303\0c",                   false);
+    
     return 0;
 }




More information about the inn-committers mailing list