INN commit: branches/2.5 (6 files)
INN Commit
rra at isc.org
Mon Sep 28 19:59:01 UTC 2009
Date: Monday, September 28, 2009 @ 12:59:01
Author: iulius
Revision: 8637
Add a function to validate the encoding of UTF-8 strings.
"ctlinnd pause", "ctlinnd readers", "ctlinnd reject",
"ctlinnd reserve", "ctlinnd throttle" and "nnrpd -r" commands
now expect a properly encoded reason.
Modified:
branches/2.5/doc/pod/news.pod
branches/2.5/include/inn/libinn.h
branches/2.5/innd/cc.c
branches/2.5/lib/uwildmat.c
branches/2.5/nnrpd/nnrpd.c
branches/2.5/tests/lib/uwildmat-t.c
------------------------+
doc/pod/news.pod | 7 +++++++
include/inn/libinn.h | 1 +
innd/cc.c | 29 +++++++++++++++++++++--------
lib/uwildmat.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
nnrpd/nnrpd.c | 3 ++-
tests/lib/uwildmat-t.c | 30 +++++++++++++++++++++++++++++-
6 files changed, 102 insertions(+), 11 deletions(-)
Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod 2009-09-28 19:57:30 UTC (rev 8636)
+++ doc/pod/news.pod 2009-09-28 19:59:01 UTC (rev 8637)
@@ -130,6 +130,13 @@
=item *
+As UTF-8 is the default character set in S<RFC 3977>, C<ctlinnd pause>,
+C<ctlinnd readers>, C<ctlinnd reject>, C<ctlinnd throttle> and
+C<nnrpd -r> commands now require the given reason to be encoded in UTF-8,
+so that it can be properly sent to news readers.
+
+=item *
+
The output of consistency checks for article storage and the F<history>
file no longer appears by default when C<cnfsstat -a> is used. A new B<-v>
flag has been added to B<cnfsstat> so as to see it.
Modified: include/inn/libinn.h
===================================================================
--- include/inn/libinn.h 2009-09-28 19:57:30 UTC (rev 8636)
+++ include/inn/libinn.h 2009-09-28 19:59:01 UTC (rev 8637)
@@ -107,6 +107,7 @@
UWILDMAT_POISON
};
+extern bool is_valid_utf8(const char *start);
extern bool uwildmat(const char *text, const char *pat);
extern bool uwildmat_simple(const char *text, const char *pat);
extern enum uwildmat uwildmat_poison(const char *text, const char *pat);
Modified: innd/cc.c
===================================================================
--- innd/cc.c 2009-09-28 19:57:30 UTC (rev 8636)
+++ innd/cc.c 2009-09-28 19:59:01 UTC (rev 8637)
@@ -275,7 +275,7 @@
/*
-** Do the work to allow foreign connectiosn.
+** Do the work to allow foreign connections.
*/
static const char *
CCallow(char *av[])
@@ -1001,6 +1001,13 @@
if (CTYPE(isupper, Rest[0]))
Rest[0] = tolower(Rest[0]);
}
+
+ who = av[2];
+ if (*who == '\0')
+ who = NEWSMASTER;
+ if (!is_valid_utf8(who))
+ return "1 Invalid UTF-8 creator's name";
+
if (strlen(Name) + strlen(Rest) > SMBUF - 24)
return "1 Name too long";
@@ -1018,9 +1025,6 @@
IOError(WHEN, oerrno);
}
else {
- who = av[2];
- if (*who == '\0')
- who = NEWSMASTER;
xasprintf(&buff, "%s %ld %s\n", Name, (long) Now.tv_sec, who);
if (xwrite(fd, buff, strlen(buff)) < 0) {
oerrno = errno;
@@ -1145,9 +1149,12 @@
if (*reason == '\0')
return CCnoreason;
- if (strlen(reason) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe */
+ if (strlen(reason) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe. */
return CCbigreason;
+ if (!is_valid_utf8(reason))
+ return "1 Invalid UTF-8 reason";
+
if (Reservation) {
if (strcmp(reason, Reservation) != 0) {
snprintf(CCreply.data, CCreply.size, "1 Reserved \"%s\"",
@@ -1228,8 +1235,10 @@
p = av[1];
if (*p == '\0')
return CCnoreason;
- if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe */
+ if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe. */
return CCbigreason;
+ if (!is_valid_utf8(p))
+ return "1 Invalid UTF-8 reason";
NNRPReason = xstrdup(p);
break;
}
@@ -1286,8 +1295,10 @@
{
if (RejectReason)
return "1 Already rejecting";
- if (strlen(av[0]) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe */
+ if (strlen(av[0]) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe. */
return CCbigreason;
+ if (!is_valid_utf8(av[0]))
+ return "1 Invalid UTF-8 reason";
RejectReason = xstrdup(av[0]);
return NULL;
}
@@ -1452,8 +1463,10 @@
/* Trying to make a reservation. */
if (Reservation)
return "1 Already reserved";
- if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe */
+ if (strlen(p) > MAX_REASON_LEN) /* MAX_REASON_LEN is as big as is safe. */
return CCbigreason;
+ if (!is_valid_utf8(p))
+ return "1 Invalid UTF-8 reason";
Reservation = xstrdup(p);
}
else {
Modified: lib/uwildmat.c
===================================================================
--- lib/uwildmat.c 2009-09-28 19:57:30 UTC (rev 8636)
+++ lib/uwildmat.c 2009-09-28 19:59:01 UTC (rev 8637)
@@ -93,7 +93,6 @@
if (end != NULL && (end - start + 1) < length)
return 1;
left = length - 1;
- p = start + 1;
for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
left--;
return (left == 0) ? length : 1;
@@ -101,6 +100,48 @@
/*
+** Check whether a string contains only valid UTF-8 characters.
+*/
+bool
+is_valid_utf8(const char *text)
+{
+ unsigned char mask;
+ const unsigned char *p;
+ int length;
+ int left;
+
+ for (p = (const unsigned char *)text; *p != '\0';) {
+ mask = 0x80;
+ length = 0;
+
+ /* Find out the expected length of the character. */
+ for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
+ length++;
+
+ p++;
+
+ /* Valid ASCII. */
+ if (length == 0)
+ continue;
+
+ /* Invalid length. */
+ if (length < 2 || length > 6)
+ return false;
+
+ /* Check that each byte looks like 10xxxxxx, except for the first. */
+ left = length - 1;
+ for (; left > 0 && (*p & 0xc0) == 0x80; p++)
+ left--;
+
+ if (left > 0)
+ return false;
+ }
+
+ return true;
+}
+
+
+/*
** Convert a UTF-8 character to UCS-4. Takes a pointer to the start of the
** character and to the last octet of the string, and to a uint32_t into
** which to put the decoded UCS-4 value. If end is NULL, expect the string
Modified: nnrpd/nnrpd.c
===================================================================
--- nnrpd/nnrpd.c 2009-09-28 19:57:30 UTC (rev 8636)
+++ nnrpd/nnrpd.c 2009-09-28 19:59:01 UTC (rev 8637)
@@ -1225,7 +1225,8 @@
/* Were we told to reject connections? */
if (Reject) {
syslog(L_NOTICE, "%s rejected %s", Client.host, Reject);
- Reply("%d %s\r\n", NNTP_FAIL_TERMINATING, Reject);
+ Reply("%d %s\r\n", NNTP_FAIL_TERMINATING,
+ is_valid_utf8(Reject) ? Reject : "Connection rejected");
ExitWithStats(0, false);
}
Modified: tests/lib/uwildmat-t.c
===================================================================
--- tests/lib/uwildmat-t.c 2009-09-28 19:57:30 UTC (rev 8636)
+++ tests/lib/uwildmat-t.c 2009-09-28 19:59:01 UTC (rev 8637)
@@ -44,10 +44,21 @@
printf(" %s\n %s\n expected %d\n", text, pattern, matches);
}
+static void
+test_v(int n, const char *text, bool matches)
+{
+ bool matched;
+
+ matched = is_valid_utf8(text);
+ printf("%sok %d\n", matched == matches ? "" : "not ", n);
+ if (matched != matches)
+ printf(" %s\n expected %d\n", text, matches);
+}
+
int
main(void)
{
- test_init(174);
+ test_init(187);
/* Basic wildmat features. */
test_r( 1, "foo", "foo", true);
@@ -249,5 +260,22 @@
test_r(174, "\303\206\357\277\277",
"*[^\303\206]", true);
+ /* Tests for the is_valid_utf8 interface. */
+ test_v(175, "a", true);
+ test_v(176, "aaabbb", true);
+ test_v(177, "test\303\251\302\240!", true);
+ test_v(178, "\200", false);
+ test_v(179, "\277", false);
+ test_v(180, "\300 ", false);
+ test_v(181, "\340\277", false);
+ test_v(182, "\374\277\277\277\277", false);
+ test_v(183, "\374\277\277\277\277\277", true);
+ test_v(184, "a\303\251b\303\251c\374\277\277\277\277\277",
+ true);
+ test_v(185, "a\303\251b\303c\374\277\277\277\277\277",
+ false);
+ test_v(186, "", true);
+ test_v(187, "a\303\251b\303\0c", false);
+
return 0;
}
More information about the inn-committers
mailing list