INN commit: branches/2.5 (4 files)
INN Commit
rra at isc.org
Sun Nov 15 09:24:43 UTC 2009
Date: Sunday, November 15, 2009 @ 01:24:43
Author: iulius
Revision: 8780
* The keyword generation code now generates a Keywords: header only
if the original article does not already have one.
* Fixed a segfault because of a possible invalid pointer beyond
the allocated Keywords: header.
* In case the generated Keywords: header is empty, nothing will
be retained (because "Keywords: \r\n" is an invalid header).
* The generated Keywords: header now does not begin with a comma.
Modified:
branches/2.5/doc/pod/inn.conf.pod
branches/2.5/innd/art.c
branches/2.5/innd/innd.h
branches/2.5/innd/keywords.c
----------------------+
doc/pod/inn.conf.pod | 7 +++++--
innd/art.c | 31 ++++++++++++++++++-------------
innd/innd.h | 3 +--
innd/keywords.c | 45 +++++++++++++++++++++------------------------
4 files changed, 45 insertions(+), 41 deletions(-)
Modified: doc/pod/inn.conf.pod
===================================================================
--- doc/pod/inn.conf.pod 2009-11-15 09:24:33 UTC (rev 8779)
+++ doc/pod/inn.conf.pod 2009-11-15 09:24:43 UTC (rev 8780)
@@ -762,8 +762,8 @@
INN has optional support for generating keyword information automatically
from article body text and putting that information in overview for the
-use of clients that know to look for it. The following parameters control
-that feature.
+use of clients that know to look for it (HDR, OVER and XPAT commands).
+The following parameters control that feature.
This may be too slow if you're taking a substantial feed, and probably
will not be useful for the average news reader; enabling this is not
@@ -777,6 +777,9 @@
Whether the keyword generation support should be enabled. This is a
boolean value and the default is false.
+If an article already contains a Keywords: header, no keyword
+generation is done and the original Keywords: header is kept untouched.
+
In order to use this feature, the regex library should be available
and INN configured with the B<--enable-keywords> flag. Otherwise,
no keywords will be generated, even though this boolean value is set
Modified: innd/art.c
===================================================================
--- innd/art.c 2009-11-15 09:24:33 UTC (rev 8779)
+++ innd/art.c 2009-11-15 09:24:43 UTC (rev 8780)
@@ -1868,8 +1868,7 @@
const ARTHEADER *hp;
char *p, *q;
int i, j, len;
- char *key_old_value = NULL;
- int key_old_length = 0;
+ bool keywords_generated = false;
if (ARTfields == NULL) {
/* User error. */
@@ -1892,11 +1891,17 @@
it into overview. */
if (DO_KEYWORDS && innconf->keywords) {
/* Ensure that there are Keywords: to shovel. */
- if (hp == &ARTheaders[HDR__KEYWORDS]) {
- key_old_value = HDR(HDR__KEYWORDS);
- key_old_length = HDR_LEN(HDR__KEYWORDS);
- KEYgenerate(&hc[HDR__KEYWORDS], cp->In.data + data->Body,
- cp->Next - data->Body, key_old_value, key_old_length);
+ if (hp == &ARTheaders[HDR__KEYWORDS] && HDR(HDR__KEYWORDS) == NULL) {
+ keywords_generated = true;
+ KEYgenerate(&hc[HDR__KEYWORDS], cp->In.data + data->Body,
+ cp->Next - data->Body);
+ /* Do not memorize an empty Keywords: header. */
+ if (HDR_LEN(HDR__KEYWORDS) == 0) {
+ if (HDR(HDR__KEYWORDS) != NULL)
+ free(HDR(HDR__KEYWORDS)); /* malloc'd within. */
+ HDR(HDR__KEYWORDS) = NULL;
+ keywords_generated = false;
+ }
}
}
@@ -1948,12 +1953,12 @@
/* Patch the old keywords back in. */
if (DO_KEYWORDS && innconf->keywords) {
- if (key_old_value) {
- if (hc[HDR__KEYWORDS].Value)
- free(hc[HDR__KEYWORDS].Value); /* malloc'd within. */
- hc[HDR__KEYWORDS].Value = key_old_value;
- hc[HDR__KEYWORDS].Length = key_old_length;
- key_old_value = NULL;
+ if (keywords_generated) {
+ if (HDR(HDR__KEYWORDS) != NULL)
+ free(HDR(HDR__KEYWORDS)); /* malloc'd within. */
+ HDR(HDR__KEYWORDS) = NULL;
+ HDR_LEN(HDR__KEYWORDS) = 0;
+ keywords_generated = false;
}
}
}
Modified: innd/innd.h
===================================================================
--- innd/innd.h 2009-11-15 09:24:33 UTC (rev 8779)
+++ innd/innd.h 2009-11-15 09:24:43 UTC (rev 8780)
@@ -747,8 +747,7 @@
extern void CCclose(void);
extern void CCsetup(void);
-extern void KEYgenerate(HDRCONTENT *, const char *body, size_t,
- const char *orig, size_t length);
+extern void KEYgenerate(HDRCONTENT *, const char *body, size_t bodylen);
extern void LCclose(void);
extern void LCsetup(void);
Modified: innd/keywords.c
===================================================================
--- innd/keywords.c 2009-11-15 09:24:33 UTC (rev 8779)
+++ innd/keywords.c 2009-11-15 09:24:43 UTC (rev 8780)
@@ -95,11 +95,9 @@
void
KEYgenerate(
- HDRCONTENT *hc, /* Header data. */
- const char *body, /* Article body. */
- size_t bodylen, /* Article body length. */
- const char *v, /* Old Keywords: value. */
- size_t l) /* Old Keywords: length. */
+ HDRCONTENT *hc, /* Header data. */
+ const char *body, /* Article body. */
+ size_t bodylen) /* Article body length. */
{
int word_count, word_length, word_index, distinct_words;
@@ -134,16 +132,11 @@
}
}
- /* First re-init Keywords: from original value. This is a mostly arbitrary
- * cutoff leaving room for a minimal word vector. */
+ /* Initialize a fresh Keywords: value, limited to the size
+ * specified by the keylimit parameter in inn.conf. */
hc->Value = xmalloc(innconf->keylimit + 1);
- if ((v != NULL) && (*v != '\0')) {
- if (l > (size_t) innconf->keylimit + 1)
- l = innconf->keylimit + 1;
- strlcpy(hc->Value, v, l);
- } else
- *hc->Value = '\0';
- l = hc->Length = strlen(hc->Value);
+ *hc->Value = '\0';
+ hc->Length = 0;
/* Now figure acceptable extents, and copy body to working string.
* (Memory-intensive for hefty articles: limit to non-ABSURD articles.) */
@@ -242,24 +235,28 @@
qsort(&word_vec[last], word_index - last,
sizeof(struct word_entry), wvec_length_cmp);
- /* Scribble onto end of Keywords: after a magic separator. */
- strlcpy(hc->Value + l, ",\377", innconf->keylimit + 1 - l);
- for (chase = hc->Value + l + 2, word_index = 0;
+ /* Write the Keywords: header. */
+ for (chase = hc->Value, word_index = 0;
word_index < distinct_words;
word_index++) {
-
- /* "noise" words don't count. */
+
+ /* "Noise" words don't count. */
if (regexec(&preg, word[word_vec[word_index].index], 0, NULL, 0) == 0)
continue;
/* Add to list. */
- *chase++ = ',';
- strlcpy(chase, word[word_vec[word_index].index],
+ if (word_index != 0)
+ *chase++ = ',';
+
+ strlcpy(chase, word[word_vec[word_index].index],
innconf->keylimit + 1 - (chase - hc->Value));
- chase += word_vec[word_index].length;
- if (chase - hc->Value > (innconf->keylimit - (MAX_WORD_LENGTH + 4)))
- break;
+ /* Is there enough space to go on? (comma + longest word) */
+ if (chase + word_vec[word_index].length - hc->Value >
+ (long) (innconf->keylimit - MAX_WORD_LENGTH - 1))
+ break;
+ else
+ chase += word_vec[word_index].length;
}
hc->Length = strlen(hc->Value);
More information about the inn-committers
mailing list