INN commit: trunk (4 files)

INN Commit rra at isc.org
Fri Nov 6 18:11:02 UTC 2009


    Date: Friday, November 6, 2009 @ 10:11:01
  Author: iulius
Revision: 8708

* The keyword generation code now generates a Keywords: header only
if the original article does not already have one.

* Fixed a segfault because of a possible invalid pointer beyond
the allocated Keywords: header.

* In case the generated Keywords: header is empty, nothing will
be retained (because "Keywords: \r\n" is an invalid header).

* The generated Keywords: header now does not begin with a comma.

Modified:
  trunk/doc/pod/inn.conf.pod
  trunk/innd/art.c
  trunk/innd/innd.h
  trunk/innd/keywords.c

----------------------+
 doc/pod/inn.conf.pod |    7 +++++--
 innd/art.c           |   31 ++++++++++++++++++-------------
 innd/innd.h          |    3 +--
 innd/keywords.c      |   45 +++++++++++++++++++++------------------------
 4 files changed, 45 insertions(+), 41 deletions(-)

Modified: doc/pod/inn.conf.pod
===================================================================
--- doc/pod/inn.conf.pod	2009-11-04 22:04:08 UTC (rev 8707)
+++ doc/pod/inn.conf.pod	2009-11-06 18:11:01 UTC (rev 8708)
@@ -762,8 +762,8 @@
 
 INN has optional support for generating keyword information automatically
 from article body text and putting that information in overview for the
-use of clients that know to look for it.  The following parameters control
-that feature.
+use of clients that know to look for it (HDR, OVER and XPAT commands).
+The following parameters control that feature.
 
 This may be too slow if you're taking a substantial feed, and probably
 will not be useful for the average news reader; enabling this is not
@@ -777,6 +777,9 @@
 Whether the keyword generation support should be enabled.  This is a
 boolean value and the default is false.
 
+If an article already contains a Keywords: header, no keyword
+generation is done and the original Keywords: header is kept untouched.
+
 In order to use this feature, the regex library should be available
 and INN configured with the B<--enable-keywords> flag.  Otherwise,
 no keywords will be generated, even though this boolean value is set

Modified: innd/art.c
===================================================================
--- innd/art.c	2009-11-04 22:04:08 UTC (rev 8707)
+++ innd/art.c	2009-11-06 18:11:01 UTC (rev 8708)
@@ -1868,8 +1868,7 @@
   const ARTHEADER *hp;
   char		*p, *q;
   int		i, j, len;
-  char		*key_old_value = NULL;
-  int		key_old_length = 0;
+  bool          keywords_generated = false;
 
   if (ARTfields == NULL) {
     /* User error. */
@@ -1892,11 +1891,17 @@
        it into overview. */
     if (DO_KEYWORDS && innconf->keywords) {
       /* Ensure that there are Keywords: to shovel. */
-      if (hp == &ARTheaders[HDR__KEYWORDS]) {
-	key_old_value  = HDR(HDR__KEYWORDS);
-	key_old_length = HDR_LEN(HDR__KEYWORDS);
-	KEYgenerate(&hc[HDR__KEYWORDS], cp->In.data + data->Body,
-                    cp->Next - data->Body, key_old_value, key_old_length);
+      if (hp == &ARTheaders[HDR__KEYWORDS] && HDR(HDR__KEYWORDS) == NULL) {
+        keywords_generated = true;
+        KEYgenerate(&hc[HDR__KEYWORDS], cp->In.data + data->Body,
+                    cp->Next - data->Body);
+        /* Do not memorize an empty Keywords: header. */
+        if (HDR_LEN(HDR__KEYWORDS) == 0) {
+          if (HDR(HDR__KEYWORDS) != NULL)
+              free(HDR(HDR__KEYWORDS)); /* malloc'd within. */
+          HDR(HDR__KEYWORDS) = NULL;
+          keywords_generated = false;
+        }
       }
     }
 
@@ -1948,12 +1953,12 @@
 
     /* Patch the old keywords back in. */
     if (DO_KEYWORDS && innconf->keywords) {
-      if (key_old_value) {
-        if (hc[HDR__KEYWORDS].Value)
-          free(hc[HDR__KEYWORDS].Value); /* malloc'd within. */
-        hc[HDR__KEYWORDS].Value  = key_old_value;
-        hc[HDR__KEYWORDS].Length = key_old_length;
-        key_old_value = NULL;
+      if (keywords_generated) {
+        if (HDR(HDR__KEYWORDS) != NULL)
+          free(HDR(HDR__KEYWORDS)); /* malloc'd within. */
+        HDR(HDR__KEYWORDS) = NULL;
+        HDR_LEN(HDR__KEYWORDS) = 0;
+        keywords_generated = false;
       }
     }
   }

Modified: innd/innd.h
===================================================================
--- innd/innd.h	2009-11-04 22:04:08 UTC (rev 8707)
+++ innd/innd.h	2009-11-06 18:11:01 UTC (rev 8708)
@@ -747,8 +747,7 @@
 extern void		CCclose(void);
 extern void		CCsetup(void);
 
-extern void             KEYgenerate(HDRCONTENT *, const char *body, size_t,
-                                    const char *orig, size_t length);
+extern void             KEYgenerate(HDRCONTENT *, const char *body, size_t bodylen);
 
 extern void		LCclose(void);
 extern void		LCsetup(void);

Modified: innd/keywords.c
===================================================================
--- innd/keywords.c	2009-11-04 22:04:08 UTC (rev 8707)
+++ innd/keywords.c	2009-11-06 18:11:01 UTC (rev 8708)
@@ -95,11 +95,9 @@
 
 void
 KEYgenerate(
-    HDRCONTENT	*hc,		/* Header data. */
-    const char	*body,		/* Article body. */
-    size_t      bodylen,	/* Article body length. */
-    const char	*v,		/* Old Keywords: value. */
-    size_t	l)		/* Old Keywords: length. */
+    HDRCONTENT	*hc,            /* Header data. */
+    const char	*body,          /* Article body. */
+    size_t      bodylen)        /* Article body length. */
 {
 
     int		word_count, word_length, word_index, distinct_words;
@@ -134,16 +132,11 @@
 	}
     }
 
-    /* First re-init Keywords: from original value.  This is a mostly arbitrary
-     * cutoff leaving room for a minimal word vector. */
+    /* Initialize a fresh Keywords: value, limited to the size
+     * specified by the keylimit parameter in inn.conf. */
     hc->Value = xmalloc(innconf->keylimit + 1);
-    if ((v != NULL) && (*v != '\0')) {
-        if (l > (size_t) innconf->keylimit + 1)
-            l = innconf->keylimit + 1;
-        strlcpy(hc->Value, v, l);
-    } else
-        *hc->Value = '\0';
-    l = hc->Length = strlen(hc->Value);
+    *hc->Value = '\0';
+    hc->Length = 0;
 
     /* Now figure acceptable extents, and copy body to working string.
      * (Memory-intensive for hefty articles:  limit to non-ABSURD articles.) */
@@ -242,24 +235,28 @@
 	qsort(&word_vec[last], word_index - last,
 	      sizeof(struct word_entry), wvec_length_cmp);
 
-    /* Scribble onto end of Keywords: after a magic separator. */
-    strlcpy(hc->Value + l, ",\377", innconf->keylimit + 1 - l);
-    for (chase = hc->Value + l + 2, word_index = 0;
+    /* Write the Keywords: header. */
+    for (chase = hc->Value, word_index = 0;
 	 word_index < distinct_words;
 	 word_index++) {
-	
-        /* "noise" words don't count. */
+
+        /* "Noise" words don't count. */
 	if (regexec(&preg, word[word_vec[word_index].index], 0, NULL, 0) == 0)
 	    continue;
 
 	/* Add to list. */
-	*chase++ = ',';
-	strlcpy(chase, word[word_vec[word_index].index],
+        if (word_index != 0)
+            *chase++ = ',';
+
+        strlcpy(chase, word[word_vec[word_index].index],
                 innconf->keylimit + 1 - (chase - hc->Value));
-	chase += word_vec[word_index].length;
 
-	if (chase - hc->Value > (innconf->keylimit - (MAX_WORD_LENGTH + 4)))
-	    break;
+        /* Is there enough space to go on?  (comma + longest word) */
+        if (chase + word_vec[word_index].length - hc->Value >
+            (long) (innconf->keylimit - MAX_WORD_LENGTH - 1))
+            break;
+        else
+            chase += word_vec[word_index].length;
     }
 
     hc->Length = strlen(hc->Value);




More information about the inn-committers mailing list