INN commit: trunk (5 files)
INN Commit
rra at isc.org
Mon Feb 4 14:21:58 UTC 2019
Date: Monday, February 4, 2019 @ 06:21:58
Author: iulius
Revision: 10325
Allow again the use of UTF-8 in header fields
Regression since INN 2.6.1. Posts with internationalized header
fields (UTF-8) are now accepted again.
Modified:
trunk/doc/pod/news.pod
trunk/lib/headers.c
trunk/lib/uwildmat.c
trunk/tests/lib/headers-t.c
trunk/tests/lib/uwildmat-t.c
------------------------+
doc/pod/news.pod | 6 ++++++
lib/headers.c | 21 +++++++++++++--------
lib/uwildmat.c | 21 +++++++++++++++------
tests/lib/headers-t.c | 11 ++++++++++-
tests/lib/uwildmat-t.c | 3 ++-
5 files changed, 46 insertions(+), 16 deletions(-)
Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod 2018-12-29 13:49:20 UTC (rev 10324)
+++ doc/pod/news.pod 2019-02-04 14:21:58 UTC (rev 10325)
@@ -20,6 +20,12 @@
=item *
+Fixed a regression since S<INN 2.6.1> that prevented articles with
+internationalized header fields (that is to say encoded in UTF-8)
+from being posted.
+
+=item *
+
Support for S<Python 3> has been added to INN. Embedded Python filtering
and authentication hooks for B<innd> and B<nnrpd> can now use S<version
3.3.0> or later of the Python interpreter. In the 2.x series, S<version
Modified: lib/headers.c
===================================================================
--- lib/headers.c 2018-12-29 13:49:20 UTC (rev 10324)
+++ lib/headers.c 2019-02-04 14:21:58 UTC (rev 10325)
@@ -40,6 +40,7 @@
/*
** Check whether the argument is a valid header field body. It starts
** after the space following the header field name and its colon.
+** Internationalized header fields encoded in UTF-8 are allowed.
**
** We currently assume the maximal line length has already been checked.
*/
@@ -52,13 +53,11 @@
if (p == NULL || *p == '\0')
return false;
+ if (!is_valid_utf8(p))
+ return false;
+
for (; *p != '\0'; p++) {
- if (isgraph((unsigned char) *p)) {
- /* Current header content line contains a (non-whitespace)
- * printable char. */
- emptycontentline = false;
- continue;
- } else if (ISWHITE(*p)) {
+ if (ISWHITE(*p)) {
/* Skip SP and TAB. */
continue;
} else if (*p == '\n' || (*p == '\r' && *++p == '\n')) {
@@ -75,9 +74,15 @@
* re-initialize emptycontentline to true. */
emptycontentline = true;
continue;
+ } else if (p[-1] == '\r') {
+ /* Case of CR not followed by LF (handled at the previous
+ * if statement). */
+ return false;
} else {
- /* Invalid character found. */
- return false;
+ /* Current header content line contains a (non-whitespace)
+ * character. */
+ emptycontentline = false;
+ continue;
}
}
Modified: lib/uwildmat.c
===================================================================
--- lib/uwildmat.c 2018-12-29 13:49:20 UTC (rev 10324)
+++ lib/uwildmat.c 2019-02-04 14:21:58 UTC (rev 10325)
@@ -63,6 +63,8 @@
#include "config.h"
#include "clibrary.h"
+#include <ctype.h>
+
#include "inn/libinn.h"
#define ABORT -1
@@ -100,7 +102,8 @@
/*
-** Check whether a string contains only valid UTF-8 characters.
+** Check whether a string contains only valid UTF-8 characters, without
+** any ASCII control characters except for \r, \n and \t.
*/
bool
is_valid_utf8(const char *text)
@@ -120,10 +123,16 @@
p++;
- /* Valid ASCII. */
- if (length == 0)
- continue;
-
+ /* Valid printable ASCII character or CR, LF or HTAB. */
+ if (length == 0) {
+ if(isprint((unsigned char) p[-1])
+ || p[-1] == '\r' || p[-1] == '\n' || p[-1] == '\t') {
+ continue;
+ } else {
+ return false;
+ }
+ }
+
/* Invalid length. */
if (length < 2 || length > 6)
return false;
@@ -350,7 +359,7 @@
return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
end = start + strlen((const char *) start) - 1;
- /* Main match loop. Find each comma that separates patterns, and attempt
+ /* Main match loop. Find each comma that separates patterns, and attempt
to match the text with each pattern in order. The last matching
pattern determines whether the whole expression matches. */
for (; p <= end + 1; p = split + 1) {
Modified: tests/lib/headers-t.c
===================================================================
--- tests/lib/headers-t.c 2018-12-29 13:49:20 UTC (rev 10324)
+++ tests/lib/headers-t.c 2019-02-04 14:21:58 UTC (rev 10325)
@@ -12,7 +12,7 @@
int
main(void)
{
- plan(9+3+9+7+12+5);
+ plan(9+3+11+8+14+7);
ok(!IsValidHeaderName(NULL), "bad header name 1");
ok(!IsValidHeaderName(""), "bad header name 2");
@@ -38,6 +38,8 @@
ok(!IsValidHeaderBody("\r\n b"), "bad header body 7");
ok(!IsValidHeaderBody("a\r\n b\r\n"), "bad header body 8");
ok(!IsValidHeaderBody("a\n\tb\n \t\n c"), "bad header body 9");
+ ok(!IsValidHeaderBody("a\003b"), "bad header body 10");
+ ok(!IsValidHeaderBody("a\r b"), "bad header body 11");
ok(IsValidHeaderBody(":"), "good header body 1");
ok(IsValidHeaderBody("a b"), "good header body 2");
@@ -46,6 +48,7 @@
ok(IsValidHeaderBody("a\r\n\tb"), "good header body 5");
ok(IsValidHeaderBody("a\n b"), "good header body 6");
ok(IsValidHeaderBody("a\n\tb\n \tc\n d"), "good header body 7");
+ ok(IsValidHeaderBody("\317\205\317\204\317\2068"), "good header body 8");
ok(!IsValidHeaderField(NULL), "bad header field 1");
ok(!IsValidHeaderField(""), "bad header field 2");
@@ -59,6 +62,8 @@
ok(!IsValidHeaderField("\177Subject: a"), "bad header field 10");
ok(!IsValidHeaderField("Subject: a\177b"), "bad header field 11");
ok(!IsValidHeaderField("Subject: a\nb"), "bad header field 12");
+ ok(!IsValidHeaderField("UT\317\2068: a"), "bad header field 13");
+ ok(!IsValidHeaderField("Control\004: a"), "bad header field 14");
ok(IsValidHeaderField("Subject: a"), "good header field 1");
ok(IsValidHeaderField("Subject: a\n\tb"), "good header field 2");
@@ -65,6 +70,10 @@
ok(IsValidHeaderField("Sub: ject"), "good header field 3");
ok(IsValidHeaderField("X-#%-T`?!: yeah"), "good header field 4");
ok(IsValidHeaderField("Subject: a\r\n\tb"), "good header field 5");
+ ok(IsValidHeaderField("Newsgroups: local.\317\205\317\204\317\2068"),
+ "good header field 6");
+ ok(IsValidHeaderField("Subject: \317\205\317\204\317\2068\r\n testing"),
+ "good header field 7");
return 0;
}
Modified: tests/lib/uwildmat-t.c
===================================================================
--- tests/lib/uwildmat-t.c 2018-12-29 13:49:20 UTC (rev 10324)
+++ tests/lib/uwildmat-t.c 2019-02-04 14:21:58 UTC (rev 10325)
@@ -58,7 +58,7 @@
int
main(void)
{
- test_init(187);
+ test_init(188);
/* Basic wildmat features. */
test_r( 1, "foo", "foo", true);
@@ -276,6 +276,7 @@
false);
test_v(186, "", true);
test_v(187, "a\303\251b\303\0c", false);
+ test_v(188, "two words", true);
return 0;
}
More information about the inn-committers
mailing list