BIND 10 master, updated. 405d85c8a0042ba807a3a123611ff383c4081ee1 [master] Merge branch 'trac1858' commit.

BIND 10 source code commits bind10-changes at lists.isc.org
Mon Oct 15 20:15:12 UTC 2012


The branch, master has been updated
       via  405d85c8a0042ba807a3a123611ff383c4081ee1 (commit)
       via  ae0565a4ed6e8a947120a7bfe826c1cf9d672760 (commit)
       via  2768eb3551fdaa2c4728563535dfb69f732dc033 (commit)
       via  9c655172a298766a37cc83173438f70fff83264c (commit)
       via  1e0c106c45251a61579d37a5148e9f258b3e5cea (commit)
       via  9e11459509c9223201dcbe10636b15ba923e1516 (commit)
       via  9dac9afb2569288bb630b00d2a89a490ad944710 (commit)
       via  c9de286421049dc1a75a981c511506d16c014501 (commit)
       via  92f2cc51478005befb016e410a4fcb9c768ea3b8 (commit)
      from  9565a7939e66bb0f3bbb4f3be8778dec13582db1 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 405d85c8a0042ba807a3a123611ff383c4081ee1
Merge: 9565a79 ae0565a
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Mon Oct 15 13:13:45 2012 -0700

    [master] Merge branch 'trac1858'
     commit.

-----------------------------------------------------------------------

Summary of changes:
 src/bin/bind10/bind10_messages.mes       |   31 ++++++++++++++----
 src/bin/bind10/bind10_src.py.in          |   52 +++++++++++++++++-------------
 src/bin/bind10/tests/bind10_test.py.in   |   36 +++++++++++++++++++--
 src/lib/python/isc/bind10/sockcreator.py |    5 +++
 4 files changed, 93 insertions(+), 31 deletions(-)

-----------------------------------------------------------------------
diff --git a/src/bin/bind10/bind10_messages.mes b/src/bin/bind10/bind10_messages.mes
index 2f48325..ed2a5d9 100644
--- a/src/bin/bind10/bind10_messages.mes
+++ b/src/bin/bind10/bind10_messages.mes
@@ -167,6 +167,30 @@ so BIND 10 will now shut down. The specific error is printed.
 % BIND10_SEND_SIGKILL sending SIGKILL to %1 (PID %2)
 The boss module is sending a SIGKILL signal to the given process.
 
+% BIND10_SEND_SIGNAL_FAIL sending %1 to %2 (PID %3) failed: %4
+The boss module sent a single (either SIGTERM or SIGKILL) to a process,
+but it failed due to some system level error.  There are two major cases:
+the target process has already terminated but the boss module had sent
+the signal before it noticed the termination.  In this case an error
+message should indicate something like "no such process".  This can be
+safely ignored.  The other case is that the boss module doesn't have
+the privilege to send a signal to the process.  It can typically
+happen when the boss module started as a privileged process, spawned a
+subprocess, and then dropped the privilege.  It includes the case for
+the socket creator when the boss process runs with the -u command line
+option.  In this case, the boss module simply gives up to terminate
+the process explicitly because it's unlikely to succeed by keeping
+sending the signal.  Although the socket creator is implemented so
+that it will terminate automatically when the boss process exits
+(and that should be the case for any other future process running with
+a higher privilege), but it's recommended to check if there's any
+remaining BIND 10 process if this message is logged.  For all other
+cases, the boss module will keep sending the signal until it confirms
+all child processes terminate.  Although unlikely, this could prevent
+the boss module from exiting, just keeping sending the signals.  So,
+again, it's advisable to check if it really terminates when this
+message is logged.
+
 % BIND10_SEND_SIGTERM sending SIGTERM to %1 (PID %2)
 The boss module is sending a SIGTERM signal to the given process.
 
@@ -274,13 +298,6 @@ During the startup process, a number of messages are exchanged between the
 Boss process and the processes it starts.  This error is output when a
 message received by the Boss process is not recognised.
 
-% BIND10_START_AS_NON_ROOT_AUTH starting b10-auth as a user, not root. This might fail.
-The authoritative server is being started or restarted without root privileges.
-If the module needs these privileges, it may have problems starting.
-Note that this issue should be resolved by the pending 'socket-creator'
-process; once that has been implemented, modules should not need root
-privileges anymore. See tickets #800 and #801 for more information.
-
 % BIND10_START_AS_NON_ROOT_RESOLVER starting b10-resolver as a user, not root. This might fail.
 The resolver is being started or restarted without root privileges.
 If the module needs these privileges, it may have problems starting.
diff --git a/src/bin/bind10/bind10_src.py.in b/src/bin/bind10/bind10_src.py.in
index 32c3152..45a2ccb 100755
--- a/src/bin/bind10/bind10_src.py.in
+++ b/src/bin/bind10/bind10_src.py.in
@@ -546,8 +546,6 @@ class BoB:
         """
             Start the Authoritative server
         """
-        if self.uid is not None and self.__started:
-            logger.warn(BIND10_START_AS_NON_ROOT_AUTH)
         authargs = ['b10-auth']
         if self.verbose:
             authargs += ['-v']
@@ -693,32 +691,42 @@ class BoB:
         # from doing so
         if not self.nokill:
             # next try sending a SIGTERM
-            components_to_stop = list(self.components.values())
-            for component in components_to_stop:
-                logger.info(BIND10_SEND_SIGTERM, component.name(), component.pid())
-                try:
-                    component.kill()
-                except OSError:
-                    # ignore these (usually ESRCH because the child
-                    # finally exited)
-                    pass
-            # finally, send SIGKILL (unmaskable termination) until everybody dies
+            self.__kill_children(False)
+            # finally, send SIGKILL (unmaskable termination) until everybody
+            # dies
             while self.components:
                 # XXX: some delay probably useful... how much is uncertain
                 time.sleep(0.1)
                 self.reap_children()
-                components_to_stop = list(self.components.values())
-                for component in components_to_stop:
-                    logger.info(BIND10_SEND_SIGKILL, component.name(),
-                                component.pid())
-                    try:
-                        component.kill(True)
-                    except OSError:
-                        # ignore these (usually ESRCH because the child
-                        # finally exited)
-                        pass
+                self.__kill_children(True)
             logger.info(BIND10_SHUTDOWN_COMPLETE)
 
+    def __kill_children(self, forceful):
+        '''Terminate remaining subprocesses by sending a signal.
+
+        The forceful paramter will be passed Component.kill().
+        This is a dedicated subroutine of shutdown(), just to unify two
+        similar cases.
+
+        '''
+        logmsg = BIND10_SEND_SIGKILL if forceful else BIND10_SEND_SIGTERM
+        # We need to make a copy of values as the components may be modified
+        # in the loop.
+        for component in list(self.components.values()):
+            logger.info(logmsg, component.name(), component.pid())
+            try:
+                component.kill(forceful)
+            except OSError as ex:
+                # If kill() failed due to EPERM, it doesn't make sense to
+                # keep trying, so we just log the fact and forget that
+                # component.  Ignore other OSErrors (usually ESRCH because
+                # the child finally exited)
+                signame = "SIGKILL" if forceful else "SIGTERM"
+                logger.info(BIND10_SEND_SIGNAL_FAIL, signame,
+                            component.name(), component.pid(), ex)
+                if ex.errno == errno.EPERM:
+                    del self.components[component.pid()]
+
     def _get_process_exit_status(self):
         return os.waitpid(-1, os.WNOHANG)
 
diff --git a/src/bin/bind10/tests/bind10_test.py.in b/src/bin/bind10/tests/bind10_test.py.in
index 0b11960..ece6370 100644
--- a/src/bin/bind10/tests/bind10_test.py.in
+++ b/src/bin/bind10/tests/bind10_test.py.in
@@ -1181,7 +1181,7 @@ class TestBossComponents(unittest.TestCase):
         # We check somewhere else that the shutdown is actually called
         # from there (the test_kills).
 
-    def __real_test_kill(self, nokill = False):
+    def __real_test_kill(self, nokill=False, ex_on_kill=None):
         """
         Helper function that does the actual kill functionality testing.
         """
@@ -1195,8 +1195,23 @@ class TestBossComponents(unittest.TestCase):
             (anyway it is not told so). It does not die if it is killed
             the first time. It dies only when killed forcefully.
             """
+            def __init__(self):
+                # number of kill() calls, preventing infinite loop.
+                self.__call_count = 0
+
             def kill(self, forceful=False):
+                self.__call_count += 1
+                if self.__call_count > 2:
+                    raise Exception('Too many calls to ImmortalComponent.kill')
+
                 killed.append(forceful)
+                if ex_on_kill is not None:
+                    # If exception is given by the test, raise it here.
+                    # In the case of ESRCH, the process should have gone
+                    # somehow, so we clear the components.
+                    if ex_on_kill.errno == errno.ESRCH:
+                        bob.components = {}
+                    raise ex_on_kill
                 if forceful:
                     bob.components = {}
             def pid(self):
@@ -1224,7 +1239,10 @@ class TestBossComponents(unittest.TestCase):
         if nokill:
             self.assertEqual([], killed)
         else:
-            self.assertEqual([False, True], killed)
+            if ex_on_kill is not None:
+                self.assertEqual([False], killed)
+            else:
+                self.assertEqual([False, True], killed)
 
         self.assertTrue(self.__called)
 
@@ -1236,6 +1254,20 @@ class TestBossComponents(unittest.TestCase):
         """
         self.__real_test_kill()
 
+    def test_kill_fail(self):
+        """Test cases where kill() results in an exception due to OS error.
+
+        The behavior should be different for EPERM, so we test two cases.
+
+        """
+
+        ex = OSError()
+        ex.errno, ex.strerror = errno.ESRCH, 'No such process'
+        self.__real_test_kill(ex_on_kill=ex)
+
+        ex.errno, ex.strerror = errno.EPERM, 'Operation not permitted'
+        self.__real_test_kill(ex_on_kill=ex)
+
     def test_nokill(self):
         """
         Test that the boss *doesn't* kill components which don't want to
diff --git a/src/lib/python/isc/bind10/sockcreator.py b/src/lib/python/isc/bind10/sockcreator.py
index c681d07..55142ee 100644
--- a/src/lib/python/isc/bind10/sockcreator.py
+++ b/src/lib/python/isc/bind10/sockcreator.py
@@ -214,9 +214,14 @@ class Creator(Parser):
                                 socket.SOCK_STREAM)
         env = copy.deepcopy(os.environ)
         env['PATH'] = path
+        # We explicitly set close_fs to True; it's False by default before
+        # Python 3.2.  If we don't close the remaining FDs, the copy of
+        # 'local' will prevent the child process from terminating when
+        # the parent process died abruptly.
         self.__process = subprocess.Popen(['b10-sockcreator'], env=env,
                                           stdin=remote.fileno(),
                                           stdout=remote2.fileno(),
+                                          close_fds=True,
                                           preexec_fn=self.__preexec_work)
         remote.close()
         remote2.close()



More information about the bind10-changes mailing list