BIND 10 trac2020, updated. 43feaa8658342eff0c2ddc9b33ef185a6826411e [2020] introduce waiting periods and retries if add_remote_config fails.

BIND 10 source code commits bind10-changes at lists.isc.org
Wed Jun 13 23:31:46 UTC 2012


The branch, trac2020 has been updated
       via  43feaa8658342eff0c2ddc9b33ef185a6826411e (commit)
       via  f5abe6318c5b84383cde4f74e79a6de0cfca87e4 (commit)
       via  5077b7bfbcd4e9ef7ce16cd3dca603e2582fd945 (commit)
      from  31408c8c110f7bbd95bbbea411cc8aee30eaf6b6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 43feaa8658342eff0c2ddc9b33ef185a6826411e
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Jun 13 16:29:31 2012 -0700

    [2020] introduce waiting periods and retries if add_remote_config fails.
    
    also, make sure catching ModuleSpecError in addition to ModuleCCSessionError
    because that's actually raised when the speicified module is not running
    (seems buggy, but that happens in the real world).

commit f5abe6318c5b84383cde4f74e79a6de0cfca87e4
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Jun 13 14:22:49 2012 -0700

    [2020] updated comments about zonemgr config updates re. zone class and errors.

commit 5077b7bfbcd4e9ef7ce16cd3dca603e2582fd945
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Jun 13 13:07:01 2012 -0700

    [2020] fixed a typo in a comment

-----------------------------------------------------------------------

Summary of changes:
 src/bin/ddns/ddns.py.in         |   65 +++++++++++++++++++++++++++++----------
 src/bin/ddns/ddns_messages.mes  |   58 +++++++++++++++++++---------------
 src/bin/ddns/tests/ddns_test.py |   57 ++++++++++++++++++++++++++--------
 3 files changed, 126 insertions(+), 54 deletions(-)

-----------------------------------------------------------------------
diff --git a/src/bin/ddns/ddns.py.in b/src/bin/ddns/ddns.py.in
index 28ad4f7..b10edbb 100755
--- a/src/bin/ddns/ddns.py.in
+++ b/src/bin/ddns/ddns.py.in
@@ -25,6 +25,7 @@ import isc.ddns.session
 from isc.ddns.zone_config import ZoneConfig
 from isc.ddns.logger import ClientFormatter, ZoneFormatter
 from isc.config.ccsession import *
+from isc.config.module_spec import ModuleSpecError
 from isc.cc import SessionError, SessionTimeout, ProtocolError
 import isc.util.process
 import isc.util.cio.socketsession
@@ -34,6 +35,7 @@ from isc.server_common.dns_tcp import DNSTCPContext
 from isc.datasrc import DataSourceClient
 from isc.server_common.auth_command import auth_loadzone_command
 import select
+import time
 import errno
 
 from isc.log_messages.ddns_messages import *
@@ -151,6 +153,14 @@ def get_datasrc_client(cc_session):
                 raise isc.datasrc.Error(self.__ex)
         return (HARDCODED_DATASRC_CLASS, DummyDataSourceClient(ex), file)
 
+def add_pause(sec):
+    '''Pause a specified period for inter module synchronization.
+
+    This is a trivial wrraper of time.sleep, but defined as a separate function
+    so tests can customize it.
+    '''
+    time.sleep(sec)
+
 class DDNSServer:
     # The number of TCP clients that can be handled by the server at the same
     # time (this should be configurable parameter).
@@ -192,14 +202,9 @@ class DDNSServer:
         self._secondary_zones = None
 
         # Get necessary configurations from remote modules.
-        try:
-            mods = [(AUTH_MODULE_NAME, self.__auth_config_handler),
-                    (ZONEMGR_MODULE_NAME, self.__zonemgr_config_handler)]
-            for mod in mods:
-                self._cc.add_remote_config_by_name(mod[0], mod[1])
-        except ModuleCCSessionError as ex:
-            logger.error(DDNS_GET_REMOTE_CONFIG_FAIL, mod[0], ex)
-            raise ex    # propagte it and die for now
+        for mod in [(AUTH_MODULE_NAME, self.__auth_config_handler),
+                    (ZONEMGR_MODULE_NAME, self.__zonemgr_config_handler)]:
+            self.__add_remote_module(mod[0], mod[1])
         # This should succeed as long as cfgmgr is up.
         isc.server_common.tsig_keyring.init_keyring(self._cc)
 
@@ -274,6 +279,31 @@ class DDNSServer:
             answer = create_answer(1, "Unknown command: " + str(cmd))
         return answer
 
+    def __add_remote_module(self, mod_name, callback):
+        '''Register interest in other module's config with a callback.'''
+
+        # Due to startup timing, add_remote_config can fail.  We could make it
+        # more sophisticated, but for now we simply retry a few times, each
+        # separated by a short period (3 times and 1 sec, arbitrary chosen,
+        # and hardcoded for now).  In practice this should be more than
+        # sufficient, but if it turns out to be a bigger problem we can
+        # consider more elegant solutions.
+        for n_try in range(0, 3):
+            try:
+                # by_name() version can fail with ModuleSpecError in getting
+                # the module spec because cfgmgr returns a "successful" answer
+                # with empty data if it cannot find the specified module.
+                # This seems to be a deviant behavior (see Trac #2039), but
+                # we need to deal with it.
+                self._cc.add_remote_config_by_name(mod_name, callback)
+                return
+            except (ModuleSpecError, ModuleCCSessionError) as ex:
+                logger.warn(DDNS_GET_REMOTE_CONFIG_FAIL, mod_name, n_try + 1,
+                            ex)
+                last_ex = ex
+                add_pause(1)
+        raise last_ex
+
     def __auth_config_handler(self, new_config, module_config):
         logger.info(DDNS_RECEIVED_AUTH_UPDATE)
 
@@ -309,16 +339,17 @@ class DDNSServer:
         new_secondary_zones = set()
         try:
             # Parse the new config and build a new list of secondary zones.
-            # Note that validation should have been done by zonemgr, so
-            # the following shouldn't fail in theory.  But the configuration
-            # interface is quite complicated and there may be a hole, so
-            # we'll perform minimal defense ourselves.
+            # Unforutnately, in the current implementation, even an observer
+            # module needs to perform full validation.  This should be changed
+            # so that only post-validation (done by the main module) config is
+            # delevered to observer modules, but until it's supported we need
+            # to protect ourselves.
             for zone_spec in sec_zones:
                 zname = Name(zone_spec['name'])
-                # class is optional per spec.  ideally this should be merged
-                # within the config module, but it's not really clear if we
-                # can assume that due to its complexity - so we don't rely on
-                # it.
+                # class has the default value in case it's unspecified.
+                # ideally this should be merged within the config module, but
+                # the current implementation doesn't esnure that, so we need to
+                # subsitute it ourselves.
                 if 'class' in zone_spec:
                     zclass = RRClass(zone_spec['class'])
                 else:
@@ -678,7 +709,7 @@ def main(ddns_server=None):
         logger.info(DDNS_STOPPED_BY_KEYBOARD)
     except SessionError as e:
         logger.error(DDNS_CC_SESSION_ERROR, str(e))
-    except ModuleCCSessionError as e:
+    except (ModuleSpecError, ModuleCCSessionError) as e:
         logger.error(DDNS_MODULECC_SESSION_ERROR, str(e))
     except DDNSConfigError as e:
         logger.error(DDNS_CONFIG_ERROR, str(e))
diff --git a/src/bin/ddns/ddns_messages.mes b/src/bin/ddns/ddns_messages.mes
index de83487..61311bc 100644
--- a/src/bin/ddns/ddns_messages.mes
+++ b/src/bin/ddns/ddns_messages.mes
@@ -59,24 +59,28 @@ authoritative server shuts down, the connection would get closed. It also
 can mean the system is busy and can't keep up or that the other side got
 confused and sent bad data.
 
-% DDNS_GET_REMOTE_CONFIG_FAIL failed to get %1 module configuration: %2
+% DDNS_GET_REMOTE_CONFIG_FAIL failed to get %1 module configuration %2 times: %3
 b10-ddns tried to get configuration of some remote modules for its
 operation, but it failed.  The most likely cause of this is that the
 remote module has not fully started up and b10-ddns couldn't get the
-configuration in a timely fashion.  In the current implementation
-b10-ddns considers it a fatal error and terminates; however, as long
-as b10-ddns is configured as a "dispensable" component (which is the
-default), the parent bind10 process will restart it, and getting the
-remote configuration will eventually (and soon) succeed.  This is not
-the optimal behavior, but should be sufficient in practice.  If it
-really causes an operational trouble other than having a few of this
-log messages, please submit a bug report; there can be several ways to
-make it more sophisticated.  Another, less likely reason for having
-this error is because the remote modules are not actually configured
-to run.  If that's the case fixing the configuration should solve the
-problem - either by making sure the remote module will run or by not
-running b10-ddns (without these remote modules b10-ddns is not
-functional, so there's no point in running it in this case).
+configuration in a timely fashion.  b10-ddns attempts to retry it a
+few times, imposing a short delay, hoping it eventually succeeds if
+it's just a timing issue.  The number of total failed attempts is also
+logged.  If it reaches an internal threshold b10-ddns considers it a
+fatal error and terminates.  Even in that case, if b10-ddns is
+configured as a "dispensable" component (which is the default), the
+parent bind10 process will restart it, and there will be another
+chance of getting the remote configuration successfully.  These are
+not the optimal behavior, but it's believed to be sufficient in
+practice (there would normally be no failure in the first place).  If
+it really causes an operational trouble other than having a few of
+these log messages, please submit a bug report; there can be several
+ways to make it more sophisticated.  Another, less likely reason for
+having this error is because the remote modules are not actually
+configured to run.  If that's the case fixing the configuration should
+solve the problem - either by making sure the remote module will run
+or by not running b10-ddns (without these remote modules b10-ddns is
+not functional, so there's no point in running it in this case).
 
 % DDNS_MODULECC_SESSION_ERROR error encountered by configuration/command module: %1
 There was a problem in the lower level module handling configuration and
@@ -162,15 +166,21 @@ zones is logged.
 % DDNS_SECONDARY_ZONES_UPDATE_FAIL failed to update secondary zone list: %1
 An error message.  b10-ddns was notified of updates to a list of
 secondary zones from b10-zonemgr and tried to update its own internal
-copy of the list, but it failed.  This can happen only if the
-configuration contains an error, but such a configuration should have
-been rejected by b10-zonemgr first and shouldn't be delivered to
-b10-ddns, so this should basically be an internal bug.  It's advisable
-to submit a bug report if you ever see this message.  Also, while
-b10-ddns still keeps running with the previous configuration when this
-error happens, it's possible that the entire system is in an
-inconsistent state.  So it's probably better to restart bind10, or at
-least restart b10-ddns.
+copy of the list, but it failed.  This can happen if the configuration
+contains an error, and b10-zonemgr should also reject that update.
+Unfortunately, in the current implementation there is no way to ensure
+that both zonemgr and ddns have consistent information when an update
+contains an error; further, as of this writing zonemgr has a bug that
+it could partially update the list of secondary zones if part of the
+list has an error (see Trac ticket #2038).  b10-ddns still keeps
+running with the previous configuration, but it's strongly advisable
+to check log messages from zonemgr, and if it indicates there can be
+inconsistent state, it's better to restart the entire BIND 10 system
+(just restarting b10-ddns wouldn't be enough, because zonemgr can have
+partially updated configuration due to bug #2038).  The log message
+contains an error description, but it's intentionally kept simple as
+it's primarily a matter of zonemgr.  To know the details of the error,
+log messages of zonemgr should be consulted.
 
 % DDNS_SESSION session arrived on file descriptor %1
 A debug message, informing there's some activity on the given file descriptor.
diff --git a/src/bin/ddns/tests/ddns_test.py b/src/bin/ddns/tests/ddns_test.py
index 4468315..b0b34ca 100755
--- a/src/bin/ddns/tests/ddns_test.py
+++ b/src/bin/ddns/tests/ddns_test.py
@@ -24,6 +24,7 @@ from isc.datasrc import DataSourceClient
 from isc.config import module_spec_from_file
 from isc.config.config_data import ConfigData
 from isc.config.ccsession import create_answer, ModuleCCSessionError
+from isc.config.module_spec import ModuleSpecError
 from isc.server_common.dns_tcp import DNSTCPContext
 import ddns
 import errno
@@ -217,8 +218,8 @@ class MyCCSession(isc.config.ConfigData):
 
         # Attributes to handle (faked) remote configurations
         self.__callbacks = {}   # record callbacks for updates to remote confs
-        self._raise_mods = set()  # set of modules that triggers exception
-                                  # on add_remote.  settable by tests.
+        self._raise_mods = {}  # map of module to exceptions to be triggered
+                               # on add_remote.  settable by tests.
         self._auth_config = {}  # faked auth cfg, settable by tests
         self._zonemgr_config = {} # faked zonemgr cfg, settable by tests
 
@@ -237,8 +238,15 @@ class MyCCSession(isc.config.ConfigData):
         return FakeSocket(1)
 
     def add_remote_config_by_name(self, module_name, update_callback=None):
-        if module_name in self._raise_mods:
-            raise ModuleCCSessionError('Failure requesting remote config data')
+        # If a list of exceptions is given for the module, raise the front one,
+        # removing that exception from the list (so the list length controls
+        # how many (and which) exceptions should be raised on add_remote).
+        if module_name in self._raise_mods.keys() and \
+                len(self._raise_mods[module_name]) != 0:
+            ex = self._raise_mods[module_name][0]
+            self._raise_mods[module_name] = self._raise_mods[module_name][1:]
+            raise ex('Failure requesting remote config data')
+
         if update_callback is not None:
             self.__callbacks[module_name] = update_callback
         if module_name is 'Auth':
@@ -340,12 +348,15 @@ class TestDDNSServer(unittest.TestCase):
         self.__tcp_sock = FakeSocket(10, socket.IPPROTO_TCP)
         self.__tcp_ctx = DNSTCPContext(self.__tcp_sock)
         self.__tcp_data = b'A' * 12 # dummy, just the same size as DNS header
+        # some tests will override this, which will be restored in tearDown:
+        self.__orig_add_pause = ddns.add_pause
 
     def tearDown(self):
         ddns.select.select = select.select
         ddns.isc.util.cio.socketsession.SocketSessionReceiver = \
             isc.util.cio.socketsession.SocketSessionReceiver
         isc.server_common.tsig_keyring = self.orig_tsig_keyring
+        ddns.add_pause = self.__orig_add_pause
 
     def test_listen(self):
         '''
@@ -499,7 +510,8 @@ class TestDDNSServer(unittest.TestCase):
         self.assertEqual({(TEST_ZONE_NAME, TEST_RRCLASS)},
                          self.ddns_server._secondary_zones)
 
-        # Similar to the above, but the optional 'class' is missing.
+        # Similar to the above, but 'class' is unspecified.  The default value
+        # should be used.
         self.__cc_session._zonemgr_config = {'secondary_zones': [
                 {'name': TEST_ZONE_NAME_STR}]}
         self.__cc_session.add_remote_config_by_name('Zonemgr')
@@ -531,16 +543,35 @@ class TestDDNSServer(unittest.TestCase):
         self.assertEqual({(TEST_ZONE_NAME, TEST_RRCLASS)},
                          self.ddns_server._secondary_zones)
 
+    def __check_remote_config_fail(self, mod_name, num_ex, expected_ex):
+        '''Subroutine for remote_config_fail test.'''
+
+        # fake pause function for inspection and to avoid having timeouts
+        added_pause = []
+        ddns.add_pause = lambda sec: added_pause.append(sec)
+
+        # In our current implementation, there will be up to 3 tries of
+        # adding the module, each separated by a 1-sec pause.  If all attempts
+        # fail the exception will be propagated.
+        exceptions = [expected_ex for i in range(0, num_ex)]
+        self.__cc_session._raise_mods = {mod_name: exceptions}
+        if num_ex >= 3:
+            self.assertRaises(expected_ex, ddns.DDNSServer, self.__cc_session)
+        else:
+            ddns.DDNSServer(self.__cc_session)
+        self.assertEqual([1 for i in range(0, num_ex)], added_pause)
+
     def test_remote_config_fail(self):
         # If getting config of Auth or Zonemgr fails on construction of
-        # DDNServer, it should result in ModuleCCSessionError.
-        self.__cc_session._raise_mods.add('Auth')
-        self.assertRaises(ModuleCCSessionError, ddns.DDNSServer,
-                          self.__cc_session)
-
-        self.__cc_session._raise_mods = {'Zonemgr'}
-        self.assertRaises(ModuleCCSessionError, ddns.DDNSServer,
-                          self.__cc_session)
+        # DDNServer, it should result in an exception and a few times
+        # of retries.  We test all possible cases, changing the number of
+        # raised exceptions and the type of exceptions that can happen,
+        # which should also cover the fatal error case.
+        for i in range(0, 4):
+            self.__check_remote_config_fail('Auth', i, ModuleCCSessionError)
+            self.__check_remote_config_fail('Auth', i, ModuleSpecError)
+            self.__check_remote_config_fail('Zonemgr', i, ModuleCCSessionError)
+            self.__check_remote_config_fail('Zonemgr', i, ModuleSpecError)
 
     def test_shutdown_command(self):
         '''Test whether the shutdown command works'''



More information about the bind10-changes mailing list