inn-STABLE: innfeed eating 100% CPU time

"Miquel van Smoorenburg" list-inn-workers at news.cistron.nl
Mon Apr 21 16:08:53 UTC 2008


I've moved one of my production boxes over to inn-STABLE about a week
ago or so. It appeared to be running just fine for a couple of days, 
but today I did a 'top' and saw that my innfeeds (I run 3 in parallel)
were using way too much CPU time.

An strace showed this:

select(95, [0 32 33 34 35 37 38 39 40 42 43 44 45 46 47 49 50 51 52 54 55 56 57 58 60 61 62 64 65 66 68 70 71 72 74 75 78 79 81 82 83 84 85 86 87 88 89 90 91 92 93 94], [31 34 35 36 39 40 41 45 46 48 51 52 53 56 57 59 61 62 63 66 69 71 72 73 75 77 79], [38 39 43 47], {0, 0}) = 1 (in [33], left {0, 0})
readv(33, [{"438 <part23of137.wdAJ02uyc3zyZuE"..., 256}], 1) = 62
select(95, [0 32 33 34 35 37 38 39 40 42 43 44 45 46 47 49 50 51 52 54 55 56 57 58 60 61 62 64 65 66 68 70 71 72 74 75 78 79 81 82 83 84 85 86 87 88 89 90 91 92 93 94], [31 34 35 36 39 40 41 45 46 48 51 52 53 56 57 59 61 62 63 66 69 71 72 73 75 77 79], [33 38 39 43 47], {0, 0}) = 0 (Timeout)
select(95, [0 32 33 34 35 37 38 39 40 42 43 44 45 46 47 49 50 51 52 54 55 56 57 58 60 61 62 64 65 66 68 70 71 72 74 75 78 79 81 82 83 84 85 86 87 88 89 90 91 92 93 94], [31 34 35 36 39 40 41 45 46 48 51 52 53 56 57 59 61 62 63 66 69 71 72 73 75 77 79], [33 38 39 43 47], {0, 0}) = 0 (Timeout)
select(95, [0 32 33 34 35 37 38 39 40 42 43 44 45 46 47 49 50 51 52 54 55 56 57 58 60 61 62 64 65 66 68 70 71 72 74 75 78 79 81 82 83 84 85 86 87 88 89 90 91 92 93 94], [31 34 35 36 39 40 41 45 46 48 51 52 53 56 57 59 61 62 63 66 69 71 72 73 75 77 79], [33 38 39 43 47], {0, 0}) = 5 (in [0 32 51], out [39 51], left {0, 0})

As you can see, the select gets a {0, 0} timeout argument. Innfeed will
still work, but it will spin in a loop eating CPU.

There are some changes in STABLE's innfeed/connection.c wrt 2.4.3, I
reverted those, and now the strace shows:

readv(51, [{"438 <part127of137.pUmCGOf2vyJK0s"..., 256}], 1) = 64
select(129, [0 35 36 37 38 40 41 42 43 44 45 46 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 66 67 68 69 71 72 73 74 76 77 78 79 80 81 82 83 84 86 87 88 89 91 95 96 97 98 99 101 103 104 105 108 109 110 111 113 114 115 117 118 119 120 121 122 123 124 125 126 127 128], [], [51], {1, 0}) = 1 (in [43], left {1, 0})
readv(43, [{"438 <part127of137.pUmCGOf2vyJK0s"..., 256}], 1) = 64
select(129, [0 35 36 37 38 40 41 42 43 44 45 46 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 66 67 68 69 71 72 73 74 76 77 78 79 80 81 82 83 84 86 87 88 89 91 95 96 97 98 99 101 103 104 105 108 109 110 111 113 114 115 117 118 119 120 121 122 123 124 125 126 127 128], [], [43 51], {1, 0}) = 1 (in [40], left {1, 0})
readv(40, [{"239 <F6GdnWRqDJ2fJ5HVnZ2dnUVZ_r7"..., 256}], 1) = 53
select(129, [0 35 36 37 38 40 41 42 43 44 45 46 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 66 67 68 69 71 72 73 74 76 77 78 79 80 81 82 83 84 86 87 88 89 91 95 96 97 98 99 101 103 104 105 108 109 110 111 113 114 115 117 118 119 120 121 122 123 124 125 126 127 128], [], [40 43 51], {1, 0}) = 1 (in [42], left {1, 0})
readv(42, [{"438 <part127of137.pUmCGOf2vyJK0s"..., 256}], 1) = 64


Here's what I reverted. Unless someone can find out why this is
happening, I think this should be patch -R 'ed in -STABLE before 2.4.4.


diff -ruN inn-2.4.3/innfeed/connection.c inn-STABLE-20080414/innfeed/connection.c
--- inn-2.4.3/innfeed/connection.c	2006-03-20 05:14:57.000000000 +0100
+++ inn-STABLE-20080414/innfeed/connection.c	2008-04-14 11:08:15.000000000 +0200
@@ -2329,9 +2329,15 @@
         cxnSleep (cxn) ;
     }
   else
-    /* The article has been sent, so start the response timer. */
-    initReadBlockedTimeout (cxn) ;
-
+    {
+      /* Some hosts return a response even before we're done sending, so don't
+         go idle until here. */
+      if (cxn->state == cxnFeedingS && cxn->articleQTotal == 0)
+        cxnIdle (cxn) ;
+      else
+        /* The command set has been sent, so start the response timer. */
+        initReadBlockedTimeout (cxn) ;
+    }
 
   freeBufferArray (b) ;
 
@@ -2940,7 +2951,7 @@
   else
     {
       remArtHolder (artHolder, &cxn->checkRespHead, &cxn->articleQTotal) ;
-      if (cxn->articleQTotal == 0)
+      if (cxn->articleQTotal == 0 && !writeIsPending (cxn->myEp))
         cxnIdle (cxn) ;
       hostArticleDeferred (cxn->myHost, cxn, artHolder->article) ;
       delArtHolder (artHolder) ;
@@ -3001,7 +3012,7 @@
       cxn->checksRefused++ ;
 
       remArtHolder (artHolder, &cxn->checkRespHead, &cxn->articleQTotal) ;
-      if (cxn->articleQTotal == 0)
+      if (cxn->articleQTotal == 0 && !writeIsPending (cxn->myEp))
         cxnIdle (cxn) ;
       hostArticleNotWanted (cxn->myHost, cxn, artHolder->article);
       delArtHolder (artHolder) ;
@@ -3062,7 +3073,7 @@
       cxn->takesSizeOkayed += artSize(artHolder->article);
 
       remArtHolder (artHolder, &cxn->takeRespHead, &cxn->articleQTotal) ;
-      if (cxn->articleQTotal == 0)
+      if (cxn->articleQTotal == 0 && !writeIsPending (cxn->myEp))
         cxnIdle (cxn) ;
       hostArticleAccepted (cxn->myHost, cxn, artHolder->article) ;
       delArtHolder (artHolder) ;
@@ -3156,7 +3167,7 @@
             {
               cxn->checksRefused++ ;
               remArtHolder (artHolder, &cxn->checkRespHead, &cxn->articleQTotal) ;
-              if (cxn->articleQTotal == 0)
+              if (cxn->articleQTotal == 0 && !writeIsPending (cxn->myEp))
                 cxnIdle (cxn) ;
               hostArticleNotWanted (cxn->myHost, cxn, artHolder->article);
               delArtHolder (artHolder) ;
@@ -3237,7 +3247,7 @@
       cxn->takesOkayed++ ;
       cxn->takesSizeOkayed += artSize(artHolder->article);
       
-      if (cxn->articleQTotal == 0)
+      if (cxn->articleQTotal == 0 && !writeIsPending (cxn->myEp))
         cxnIdle (cxn) ;
 
       hostArticleAccepted (cxn->myHost, cxn, artHolder->article) ;
@@ -3917,7 +3927,10 @@
 
   /* If there's a write pending we can't do anything now. */
   if ( writeIsPending (cxn->myEp) )
-    return ;
+    {
+      addWorkCallback (cxn->myEp,cxnWorkProc,cxn) ;
+      return ;
+    }
   else if ( writesNeeded (cxn) ) /* something on a queue. */
     {
       if (cxn->doesStreaming)
@@ -4157,7 +4170,7 @@
      was a big backlog of missing articles *and* we're running in
      no-CHECK mode, then the Host would be putting bad articles on the
      queue we're taking them off of. */
-  if (cxn->missing && cxn->articleQTotal == 0)
+  if (cxn->missing && cxn->articleQTotal == 0 && !writeIsPending (cxn->myEp))
     cxnIdle (cxn) ;
   for (p = cxn->missing ; p != NULL ; p = q)
     {
-- 
The From: and Reply-To: addresses are internal news2mail gateway addresses.
Reply to the list or to "Miquel van Smoorenburg" <miquels at cistron.nl>


More information about the inn-workers mailing list