BIND 10 master, updated. 95a03bbefb559615f3f6e529d408b749964d390a [master] Merge branch 'trac988'

BIND 10 source code commits bind10-changes at lists.isc.org
Tue Jun 19 00:09:02 UTC 2012


The branch, master has been updated
       via  95a03bbefb559615f3f6e529d408b749964d390a (commit)
       via  8e1c45388cdecec2658bc3d8ba69efd728f9806c (commit)
       via  9a24a3143e6eb00ca6e242cccef6b76306c96761 (commit)
       via  28603760451a1f515e68fb196ec5f3ad007edbc5 (commit)
      from  5f1c1f8ccf2e0af14706f963b4d764fd308a1bdd (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 95a03bbefb559615f3f6e529d408b749964d390a
Merge: 5f1c1f8 8e1c453
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Mon Jun 18 17:05:24 2012 -0700

    [master] Merge branch 'trac988'

-----------------------------------------------------------------------

Summary of changes:
 src/bin/xfrout/tests/xfrout_test.py.in |   48 ++++++++++++++++++++++++++++++
 src/bin/xfrout/xfrout.py.in            |   51 +++++++++++++++++++++-----------
 src/bin/xfrout/xfrout_messages.mes     |   27 ++++++++++++-----
 3 files changed, 101 insertions(+), 25 deletions(-)

-----------------------------------------------------------------------
diff --git a/src/bin/xfrout/tests/xfrout_test.py.in b/src/bin/xfrout/tests/xfrout_test.py.in
index 67d68c3..e4fc873 100644
--- a/src/bin/xfrout/tests/xfrout_test.py.in
+++ b/src/bin/xfrout/tests/xfrout_test.py.in
@@ -60,6 +60,9 @@ class MySocket():
         self.sendqueue.extend(data);
         return len(data)
 
+    def fileno(self):
+        return 42               # simply return a constant dummy value
+
     def readsent(self):
         if len(self.sendqueue) >= 2:
             size = 2 + struct.unpack("!H", self.sendqueue[:2])[0]
@@ -1155,6 +1158,15 @@ class TestUnixSockServer(unittest.TestCase):
     def setUp(self):
         self.write_sock, self.read_sock = socket.socketpair()
         self.unix = MyUnixSockServer()
+        # Some test below modify these module-wide attributes.  We'll need
+        # to restore them at the end of each test, so we remember them here.
+        self.__select_bak = xfrout.select.select
+        self.__recv_fd_back = xfrout.recv_fd
+
+    def tearDown(self):
+        # Restore possibly faked module-wide attributes.
+        xfrout.select.select = self.__select_bak
+        xfrout.recv_fd = self.__recv_fd_back
 
     def test_tsig_keyring(self):
         """
@@ -1410,6 +1422,42 @@ class TestUnixSockServer(unittest.TestCase):
         sys.stdout = old_stdout
         os.rmdir(dir_name)
 
+    def __fake_select(self, r, w, e):
+        '''select emulator used in select_loop_fail test.'''
+        # This simplified faked function assumes to be called at most once,
+        # and in that case just return a pre-configured "readable" sockets.
+        if self.__select_count > 0:
+            raise RuntimeError('select called unexpected number of times')
+        self.__select_count += 1
+        return (self.__select_return_redable, [], [])
+
+    def test_select_loop_fail(self):
+        '''Check failure events in the main loop.'''
+        # setup faked select() environments
+        self.unix._read_sock = MySocket(socket.AF_INET6, socket.SOCK_STREAM)
+        xfrout.select.select = self.__fake_select
+        self.__select_return_redable = [MySocket(socket.AF_INET6,
+                                                 socket.SOCK_STREAM)]
+
+        # Check that loop terminates if recv_fd() fails.
+        for ret_code in [-1, FD_SYSTEM_ERROR]:
+            # fake recv_fd so it returns the faked failure code.
+            xfrout.recv_fd = lambda fileno: ret_code
+
+            # reset the counter, go to the loop.
+            self.__select_count = 0
+            self.unix._select_loop(self.__select_return_redable[0])
+            # select should have been called exactly once.
+            self.assertEqual(1, self.__select_count)
+
+        # Next, we test the case where recf_fd succeeds but receiving the
+        # request fails.
+        self.__select_count = 0
+        xfrout.recv_fd = lambda fileno: 1
+        self.unix._receive_query_message = lambda fd: None
+        self.unix._select_loop(self.__select_return_redable[0])
+        self.assertEqual(1, self.__select_count)
+
 class TestInitialization(unittest.TestCase):
     def setEnv(self, name, value):
         if value is None:
diff --git a/src/bin/xfrout/xfrout.py.in b/src/bin/xfrout/xfrout.py.in
index 73055fe..d0c54f8 100755
--- a/src/bin/xfrout/xfrout.py.in
+++ b/src/bin/xfrout/xfrout.py.in
@@ -678,30 +678,40 @@ class UnixSockServer(socketserver_mixin.NoPollMixIn,
         except socket.error:
             logger.error(XFROUT_FETCH_REQUEST_ERROR)
             return
+        self._select_loop(request)
+
+    def _select_loop(self, request_sock):
+        '''Main loop for a single session between xfrout and auth.
+
+        This is a dedicated subroutine of handle_request(), but is defined
+        as a separate "protected" method for the convenience of tests.
+        '''
 
         # Check self._shutdown_event to ensure the real shutdown comes.
         # Linux could trigger a spurious readable event on the _read_sock
         # due to a bug, so we need perform a double check.
         while not self._shutdown_event.is_set(): # Check if xfrout is shutdown
             try:
-                (rlist, wlist, xlist) = select.select([self._read_sock, request], [], [])
+                (rlist, wlist, xlist) = select.select([self._read_sock,
+                                                       request_sock], [], [])
             except select.error as e:
                 if e.args[0] == errno.EINTR:
                     (rlist, wlist, xlist) = ([], [], [])
                     continue
                 else:
-                    logger.error(XFROUT_SOCKET_SELECT_ERROR, str(e))
+                    logger.error(XFROUT_SOCKET_SELECT_ERROR, e)
                     break
 
-            # self.server._shutdown_event will be set by now, if it is not a false
-            # alarm
+            # self.server._shutdown_event will be set by now, if it is not a
+            # false alarm
             if self._read_sock in rlist:
                 continue
 
             try:
-                self.process_request(request)
+                if not self.process_request(request_sock):
+                    break
             except Exception as pre:
-                logger.error(XFROUT_PROCESS_REQUEST_ERROR, str(pre))
+                logger.error(XFROUT_PROCESS_REQUEST_ERROR, pre)
                 break
 
     def _handle_request_noblock(self):
@@ -713,26 +723,33 @@ class UnixSockServer(socketserver_mixin.NoPollMixIn,
 
     def process_request(self, request):
         """Receive socket fd and query message from auth, then
-        start a new thread to process the request."""
+        start a new thread to process the request.
+
+        Return: True if everything is okay; otherwise False, in which case
+        the calling thread will terminate.
+
+        """
         sock_fd = recv_fd(request.fileno())
         if sock_fd < 0:
-            # This may happen when one xfrout process try to connect to
-            # xfrout unix socket server, to check whether there is another
-            # xfrout running.
-            if sock_fd == FD_SYSTEM_ERROR:
-                logger.error(XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR)
-            return
+            logger.warn(XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR)
+            return False
 
-        # receive request msg
+        # receive request msg.  If it fails we simply terminate the thread;
+        # it might be possible to recover from this state, but it's more likely
+        # that auth and xfrout are in inconsistent states.  So it will make
+        # more sense to restart in a new session.
         request_data = self._receive_query_message(request)
-        if not request_data:
-            return
+        if request_data is None:
+            # The specific exception type doesn't matter so we use session
+            # error.
+            raise XfroutSessionError('Failed to get complete xfr request')
 
         t = threading.Thread(target=self.finish_request,
-                             args = (sock_fd, request_data))
+                             args=(sock_fd, request_data))
         if self.daemon_threads:
             t.daemon = True
         t.start()
+        return True
 
     def _guess_remote(self, sock_fd):
         """Guess remote address and port of the socket.
diff --git a/src/bin/xfrout/xfrout_messages.mes b/src/bin/xfrout/xfrout_messages.mes
index a636f78..9f674a2 100644
--- a/src/bin/xfrout/xfrout_messages.mes
+++ b/src/bin/xfrout/xfrout_messages.mes
@@ -115,11 +115,15 @@ In general, this should only occur for unexpected problems like
 memory allocation failures, as the query should already have been
 parsed by the b10-auth daemon, before it was passed here.
 
-% XFROUT_PROCESS_REQUEST_ERROR error processing transfer request: %2
-There was an error processing a transfer request. The error is included
-in the log message, but at this point no specific information other
-than that could be given. This points to incomplete exception handling
-in the code.
+% XFROUT_PROCESS_REQUEST_ERROR error processing transfer request: %1
+There was an error in receiving a transfer request from b10-auth.
+This is generally an unexpected event, but is possible when, for
+example, b10-auth terminates in the middle of forwarding the request.
+When this happens it's unlikely to be recoverable with the same
+communication session with b10-auth, so b10-xfrout drops it and
+waits for a new session.  In any case, this error indicates that
+there's something very wrong in the system, so it's advisable to check
+the over all status of the BIND 10 system.
 
 % XFROUT_QUERY_DROPPED %1 client %2: request to transfer %3 dropped
 The xfrout process silently dropped a request to transfer zone to
@@ -149,9 +153,16 @@ and will now shut down.
 
 % XFROUT_RECEIVE_FILE_DESCRIPTOR_ERROR error receiving the file descriptor for an XFR connection
 There was an error receiving the file descriptor for the transfer
-request. Normally, the request is received by b10-auth, and passed on
-to the xfrout daemon, so it can answer directly. However, there was a
-problem receiving this file descriptor. The request will be ignored.
+request from b10-auth.  There can be several reasons for this, but
+the most likely cause is that b10-auth terminates for some reason
+(maybe it's a bug of b10-auth, maybe it's an intentional restart by
+the administrator), so depending on how this happens it may or may not
+be a serious error.  But in any case this is not expected to happen
+frequently, and it's advisable to figure out how this happened if
+this message is logged.  Even if this error happens xfrout will reset
+its internal state and will keep receiving further requests.  So
+if it's just a temporary restart of b10-auth the administrator does
+not have to do anything.
 
 % XFROUT_REMOVE_OLD_UNIX_SOCKET_FILE_ERROR error removing unix socket file %1: %2
 The unix socket file xfrout needs for contact with the auth daemon



More information about the bind10-changes mailing list