BIND 10 trac1028, updated. 1e9bb55e135af5a0d8dc353a2ffde7c5b247f92a [1028] final (unrelated) cleanups: removed now-unused functions.

BIND 10 source code commits bind10-changes at lists.isc.org
Thu Oct 27 06:57:17 UTC 2011


The branch, trac1028 has been updated
       via  1e9bb55e135af5a0d8dc353a2ffde7c5b247f92a (commit)
       via  738b11db9f13c00f5a9ddfb3ab9996fbf85c42d8 (commit)
       via  1fc79b932eaa88be33c224e4eea3fc58907e98bd (commit)
       via  8d36a0115d1b3051b88c9f9687103fa2427e749c (commit)
       via  65bd895a45fd28c43f748f07aad5fb9321fa6a0a (commit)
      from  a1e64504a4d039b4c7f7434451f169c475a1a35a (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 1e9bb55e135af5a0d8dc353a2ffde7c5b247f92a
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Oct 26 23:56:49 2011 -0700

    [1028] final (unrelated) cleanups: removed now-unused functions.

commit 738b11db9f13c00f5a9ddfb3ab9996fbf85c42d8
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Oct 26 23:55:10 2011 -0700

    [1028] prevented more unusual leaks when process_xfrin() raises an excpetion.
    also logged it when such an event happens.

commit 1fc79b932eaa88be33c224e4eea3fc58907e98bd
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Oct 26 16:07:01 2011 -0700

    [1028] removed self reference in XfrinConnection, which apparently caused
    reference leak.  To test this case, introduced a framework for testing
    process_xfrin().  This setup will be used for subseuent related fixes.

commit 8d36a0115d1b3051b88c9f9687103fa2427e749c
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Oct 26 14:38:59 2011 -0700

    [1028] cleanup: use a more appropriate variable name to specify s_ZoneUpdater.
    also fixed another possible (but quite unlikely) leak in case tp_alloc() fails.

commit 65bd895a45fd28c43f748f07aad5fb9321fa6a0a
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Wed Oct 26 14:37:03 2011 -0700

    [1028] fixed reference leak to DataSourceClient after get_updater.

-----------------------------------------------------------------------

Summary of changes:
 src/bin/xfrin/tests/xfrin_test.py                |  121 ++++++++++++++++-
 src/bin/xfrin/xfrin.py.in                        |  153 ++++++++++++----------
 src/bin/xfrin/xfrin_messages.mes                 |   15 ++
 src/lib/python/isc/datasrc/tests/datasrc_test.py |   18 +++
 src/lib/python/isc/datasrc/updater_python.cc     |   15 +-
 5 files changed, 240 insertions(+), 82 deletions(-)

-----------------------------------------------------------------------
diff --git a/src/bin/xfrin/tests/xfrin_test.py b/src/bin/xfrin/tests/xfrin_test.py
index 65bd968..401b4a7 100644
--- a/src/bin/xfrin/tests/xfrin_test.py
+++ b/src/bin/xfrin/tests/xfrin_test.py
@@ -16,6 +16,7 @@
 import unittest
 import shutil
 import socket
+import sys
 import io
 from isc.testutils.tsigctx_mock import MockTSIGContext
 from xfrin import *
@@ -216,8 +217,8 @@ class MockXfrin(Xfrin):
                                  request_type, check_soa)
 
 class MockXfrinConnection(XfrinConnection):
-    def __init__(self, sock_map, zone_name, rrclass, shutdown_event,
-                 master_addr):
+    def __init__(self, sock_map, zone_name, rrclass, datasrc_client,
+                 shutdown_event, master_addr, tsig_key=None):
         super().__init__(sock_map, zone_name, rrclass, MockDataSourceClient(),
                          shutdown_event, master_addr)
         self.query_data = b''
@@ -300,8 +301,9 @@ class TestXfrinState(unittest.TestCase):
     def setUp(self):
         self.sock_map = {}
         self.conn = MockXfrinConnection(self.sock_map, TEST_ZONE_NAME,
-                                        TEST_RRCLASS, threading.Event(),
+                                        TEST_RRCLASS, None, threading.Event(),
                                         TEST_MASTER_IPV4_ADDRINFO)
+        self.conn.init_socket()
         self.begin_soa = RRset(TEST_ZONE_NAME, TEST_RRCLASS, RRType.SOA(),
                                RRTTL(3600))
         self.begin_soa.add_rdata(Rdata(RRType.SOA(), TEST_RRCLASS,
@@ -585,8 +587,9 @@ class TestXfrinConnection(unittest.TestCase):
             os.remove(TEST_DB_FILE)
         self.sock_map = {}
         self.conn = MockXfrinConnection(self.sock_map, TEST_ZONE_NAME,
-                                        TEST_RRCLASS, threading.Event(),
+                                        TEST_RRCLASS, None, threading.Event(),
                                         TEST_MASTER_IPV4_ADDRINFO)
+        self.conn.init_socket()
         self.soa_response_params = {
             'questions': [example_soa_question],
             'bad_qid': False,
@@ -720,14 +723,16 @@ class TestAXFR(TestXfrinConnection):
         # to confirm an AF_INET6 socket has been created.  A naive application
         # tends to assume it's IPv4 only and hardcode AF_INET.  This test
         # uncovers such a bug.
-        c = MockXfrinConnection({}, TEST_ZONE_NAME, TEST_RRCLASS,
+        c = MockXfrinConnection({}, TEST_ZONE_NAME, TEST_RRCLASS, None,
                                 threading.Event(), TEST_MASTER_IPV6_ADDRINFO)
+        c.init_socket()
         c.bind(('::', 0))
         c.close()
 
     def test_init_chclass(self):
-        c = MockXfrinConnection({}, TEST_ZONE_NAME, RRClass.CH(),
+        c = MockXfrinConnection({}, TEST_ZONE_NAME, RRClass.CH(), None,
                                 threading.Event(), TEST_MASTER_IPV4_ADDRINFO)
+        c.init_socket()
         axfrmsg = c._create_query(RRType.AXFR())
         self.assertEqual(axfrmsg.get_question()[0].get_class(),
                          RRClass.CH())
@@ -1679,6 +1684,110 @@ class TestXfrinRecorder(unittest.TestCase):
         self.recorder.decrement(TEST_ZONE_NAME)
         self.assertEqual(self.recorder.xfrin_in_progress(TEST_ZONE_NAME), False)
 
+class TestXfrinProcess(unittest.TestCase):
+    def setUp(self):
+        self.unlocked = False
+        self.conn_closed = False
+        self.do_raise_on_close = False
+        self.do_raise_on_connect = False
+        self.do_raise_on_publish = False
+        self.master = (socket.AF_INET, socket.SOCK_STREAM,
+                       (TEST_MASTER_IPV4_ADDRESS, TEST_MASTER_PORT))
+
+    def tearDown(self):
+        # whatever happens the lock acquired in xfrin_recorder.increment
+        # must always be released.  We checked the condition for all test
+        # cases.
+        self.assertTrue(self.unlocked)
+
+        # Same for the connection
+        self.assertTrue(self.conn_closed)
+
+    def increment(self, zone_name):
+        '''Fake method of xfrin_recorder.increment.
+
+        '''
+        self.unlocked = False
+
+    def decrement(self, zone_name):
+        '''Fake method of xfrin_recorder.decrement.
+
+        '''
+        self.unlocked = True
+
+    def publish_xfrin_news(self, zone_name, rrclass, ret):
+        '''Fake method of serve.publish_xfrin_news
+
+        '''
+        if self.do_raise_on_publish:
+            raise XfrinTestException('Emulated exception in publish')
+
+    def connect_to_master(self, conn):
+        self.sock_fd = conn.fileno()
+        if self.do_raise_on_connect:
+            raise XfrinTestException('Emulated exception in connect')
+        return True
+
+    def conn_close(self, conn):
+        self.conn_closed = True
+        XfrinConnection.close(conn)
+        if self.do_raise_on_close:
+            raise XfrinTestException('Emulated exception in connect')
+
+    def create_xfrinconn(self, sock_map, zone_name, rrclass, datasrc_client,
+                         shutdown_event, master_addrinfo, tsig_key):
+        conn = MockXfrinConnection(sock_map, zone_name, rrclass,
+                                   datasrc_client, shutdown_event,
+                                   master_addrinfo, tsig_key)
+
+        # An awkward check that would specifically identify an old bug
+        # where initialziation of XfrinConnection._tsig_ctx_creator caused
+        # self reference and subsequently led to reference leak.
+        orig_ref = sys.getrefcount(conn)
+        conn._tsig_ctx_creator = None
+        self.assertEqual(orig_ref, sys.getrefcount(conn))
+
+        # Replace some methods for connect with our internal ones for the
+        # convenience of tests
+        conn.connect_to_master = lambda : self.connect_to_master(conn)
+        conn.do_xfrin = lambda x, y : XFRIN_OK
+        conn.close = lambda : self.conn_close(conn)
+
+        return conn
+
+    def test_process_xfrin_normal(self):
+        # Normal, successful case.  We only check that things are cleaned up
+        # at the tearDown time.
+        process_xfrin(self, self, TEST_ZONE_NAME, TEST_RRCLASS, None, None,
+                      self.master,  False, None, RRType.AXFR(),
+                      self.create_xfrinconn)
+
+    def test_process_xfrin_exception_on_connect(self):
+        # connect_to_master() will raise an exception.  Things must still be
+        # cleaned up.
+        self.do_raise_on_connect = True
+        process_xfrin(self, self, TEST_ZONE_NAME, TEST_RRCLASS, None, None,
+                      self.master,  False, None, RRType.AXFR(),
+                      self.create_xfrinconn)
+
+    def test_process_xfrin_exception_on_close(self):
+        # connect() will result in exception, and even the cleanup close()
+        # will fail with an exception.  This should be quite likely a bug,
+        # but we deal with that case.
+        self.do_raise_on_connect = True
+        self.do_raise_on_close = True
+        process_xfrin(self, self, TEST_ZONE_NAME, TEST_RRCLASS, None, None,
+                      self.master,  False, None, RRType.AXFR(),
+                      self.create_xfrinconn)
+
+    def test_process_xfrin_exception_on_publish(self):
+        # xfr succeeds but notifying the zonemgr fails with exception.
+        # everything must still be cleaned up.
+        self.do_raise_on_publish = True
+        process_xfrin(self, self, TEST_ZONE_NAME, TEST_RRCLASS, None, None,
+                      self.master,  False, None, RRType.AXFR(),
+                      self.create_xfrinconn)
+
 class TestXfrin(unittest.TestCase):
     def setUp(self):
         # redirect output
diff --git a/src/bin/xfrin/xfrin.py.in b/src/bin/xfrin/xfrin.py.in
index 1f5d9a1..c0ce1d5 100755
--- a/src/bin/xfrin/xfrin.py.in
+++ b/src/bin/xfrin/xfrin.py.in
@@ -323,6 +323,7 @@ class XfrinFirstData(XfrinState):
                  conn.zone_str())
             # We are now going to add RRs to the new zone.  We need create
             # a Diff object.  It will be used throughtout the XFR session.
+            # DISABLE FOR DEBUG
             conn._diff = Diff(conn._datasrc_client, conn._zone_name, True)
             self.set_xfrstate(conn, XfrinAXFR())
         return False
@@ -468,21 +469,27 @@ class XfrinConnection(asyncore.dispatcher):
         # Data source handler
         self._datasrc_client = datasrc_client
 
-        self.create_socket(master_addrinfo[0], master_addrinfo[1])
         self._sock_map = sock_map
         self._soa_rr_count = 0
         self._idle_timeout = idle_timeout
-        self.setblocking(1)
         self._shutdown_event = shutdown_event
-        self._master_address = master_addrinfo[2]
+        self._master_addrinfo = master_addrinfo
         self._tsig_key = tsig_key
         self._tsig_ctx = None
         # tsig_ctx_creator is introduced to allow tests to use a mock class for
         # easier tests (in normal case we always use the default)
-        self._tsig_ctx_creator = self.__create_tsig_ctx
+        self._tsig_ctx_creator = lambda key : TSIGContext(key)
 
-    def __create_tsig_ctx(self, key):
-        return TSIGContext(key)
+    def init_socket(self):
+        '''Initialize the underlyig socket.
+
+        This is essentially a part of __init__() and is expected to be
+        called immediately after the constructor.  It's separated from
+        the constructor because otherwise we might not be able to close
+        it if the constructor raises an exception after opening the socket.
+        '''
+        self.create_socket(self._master_addrinfo[0], self._master_addrinfo[1])
+        self.setblocking(1)
 
     def __set_xfrstate(self, new_state):
         self.__state = new_state
@@ -498,10 +505,11 @@ class XfrinConnection(asyncore.dispatcher):
         '''Connect to master in TCP.'''
 
         try:
-            self.connect(self._master_address)
+            self.connect(self._master_addrinfo[2])
             return True
         except socket.error as e:
-            logger.error(XFRIN_CONNECT_MASTER, self._master_address, str(e))
+            logger.error(XFRIN_CONNECT_MASTER, self._master_addrinfo[2],
+                         str(e))
             return False
 
     def _get_zone_soa(self):
@@ -697,7 +705,6 @@ class XfrinConnection(asyncore.dispatcher):
             # (if not yet - possible in case of xfr-level exception) as soon
             # as possible
             self._diff = None
-            self.close()
 
         return ret
 
@@ -730,33 +737,6 @@ class XfrinConnection(asyncore.dispatcher):
         if msg.get_rr_count(Message.SECTION_QUESTION) > 1:
             raise XfrinException('query section count greater than 1')
 
-    def _handle_answer_section(self, answer_section):
-        '''Return a generator for the reponse in one tcp package to a zone transfer.'''
-
-        for rrset in answer_section:
-            rrset_name = rrset.get_name().to_text()
-            rrset_ttl = int(rrset.get_ttl().to_text())
-            rrset_class = rrset.get_class().to_text()
-            rrset_type = rrset.get_type().to_text()
-
-            for rdata in rrset.get_rdata():
-                # Count the soa record count
-                if rrset.get_type() == RRType.SOA():
-                    self._soa_rr_count += 1
-
-                    # XXX: the current DNS message parser can't preserve the
-                    # RR order or separete the beginning and ending SOA RRs.
-                    # As a short term workaround, we simply ignore the second
-                    # SOA, and ignore the erroneous case where the transfer
-                    # session doesn't end with an SOA.
-                    if (self._soa_rr_count == 2):
-                        # Avoid inserting soa record twice
-                        break
-
-                rdata_text = rdata.to_text()
-                yield (rrset_name, rrset_ttl, rrset_class, rrset_type,
-                       rdata_text)
-
     def _handle_xfrin_responses(self):
         read_next_msg = True
         while read_next_msg:
@@ -794,47 +774,82 @@ class XfrinConnection(asyncore.dispatcher):
 
         return False
 
-    def log_info(self, msg, type='info'):
-        # Overwrite the log function, log nothing
-        pass
-
-def process_xfrin(server, xfrin_recorder, zone_name, rrclass, db_file,
-                  shutdown_event, master_addrinfo, check_soa, tsig_key,
-                  request_type):
-    xfrin_recorder.increment(zone_name)
-
-    # Create a data source client used in this XFR session.  Right now we
-    # still assume an sqlite3-based data source, and use both the old and new
-    # data source APIs.  We also need to use a mock client for tests.
-    # For a temporary workaround to deal with these situations, we skip the
-    # creation when the given file is none (the test case).  Eventually
-    # this code will be much cleaner.
-    datasrc_client = None
-    if db_file is not None:
-        # temporary hardcoded sqlite initialization. Once we decide on
-        # the config specification, we need to update this (TODO)
-        # this may depend on #1207, or any followup ticket created for #1207
-        datasrc_type = "sqlite3"
-        datasrc_config = "{ \"database_file\": \"" + db_file + "\"}"
-        datasrc_client = DataSourceClient(datasrc_type, datasrc_config)
-
-    # Create a TCP connection for the XFR session and perform the operation.
-    sock_map = {}
-    conn = XfrinConnection(sock_map, zone_name, rrclass, datasrc_client,
-                           shutdown_event, master_addrinfo, tsig_key)
-    # XXX: We still need _db_file for temporary workaround in _create_query().
-    # This should be removed when we eliminate the need for the workaround.
-    conn._db_file = db_file
+def __process_xfrin(server, zone_name, rrclass, db_file,
+                    shutdown_event, master_addrinfo, check_soa, tsig_key,
+                    request_type, conn_class=XfrinConnection):
+    conn = None
+    exception = None
     ret = XFRIN_FAIL
-    if conn.connect_to_master():
-        ret = conn.do_xfrin(check_soa, request_type)
+    try:
+        # Create a data source client used in this XFR session.  Right now we
+        # still assume an sqlite3-based data source, and use both the old and
+        # new data source APIs.  We also need to use a mock client for tests.
+        # For a temporary workaround to deal with these situations, we skip the
+        # creation when the given file is none (the test case).  Eventually
+        # this code will be much cleaner.
+        datasrc_client = None
+        if db_file is not None:
+            # temporary hardcoded sqlite initialization. Once we decide on
+            # the config specification, we need to update this (TODO)
+            # this may depend on #1207, or any followup ticket created for #1207
+            datasrc_type = "sqlite3"
+            datasrc_config = "{ \"database_file\": \"" + db_file + "\"}"
+            datasrc_client = DataSourceClient(datasrc_type, datasrc_config)
+
+        # Create a TCP connection for the XFR session and perform the operation
+        sock_map = {}
+        conn = conn_class(sock_map, zone_name, rrclass, datasrc_client,
+                          shutdown_event, master_addrinfo, tsig_key)
+        conn.init_socket()
+        # XXX: We still need _db_file for temporary workaround in _create_query().
+        # This should be removed when we eliminate the need for the workaround.
+        conn._db_file = db_file
+        if conn.connect_to_master():
+            ret = conn.do_xfrin(check_soa, request_type)
+    except Exception as ex:
+        # If exception happens, just remember it here so that we can re-raise
+        # after cleaning up things.  We don't log it here because we want
+        # eliminate smallest possibility of having an exception in logging
+        # itself.
+        exception = ex
+
+    # asyncore.dispatcher requires explicit close() unless its lifetime
+    # from born to destruction is closed within asyncore.loop, which is not
+    # the case for us.  We always close() here, whether or not do_xfrin
+    # succeeds, and even when we see an unexpected exception.
+    if conn is not None:
+        conn.close()
 
     # Publish the zone transfer result news, so zonemgr can reset the
     # zone timer, and xfrout can notify the zone's slaves if the result
     # is success.
     server.publish_xfrin_news(zone_name, rrclass, ret)
+
+    if exception is not None:
+        raise exception
+
+def process_xfrin(server, xfrin_recorder, zone_name, rrclass, db_file,
+                  shutdown_event, master_addrinfo, check_soa, tsig_key,
+                  request_type, conn_class=XfrinConnection):
+    # Even if it should be rare, the main process of xfrin session can
+    # raise an exception.  In order to make sure the lock in xfrin_recorder
+    # is released in any cases, we delegate the main part to the helper
+    # function in the try block, catch any exceptions, then release the lock.
+    xfrin_recorder.increment(zone_name)
+    exception = None
+    try:
+        __process_xfrin(server, zone_name, rrclass, db_file,
+                        shutdown_event, master_addrinfo, check_soa, tsig_key,
+                        request_type, conn_class)
+    except Exception as ex:
+        # don't log it until we complete derement().
+        exception = ex
     xfrin_recorder.decrement(zone_name)
 
+    if exception is not None:
+        typestr = "AXFR" if request_type == RRType.AXFR() else "IXFR"
+        logger.error(XFRIN_XFR_PROCESS_FAILURE, typestr, zone_name.to_text(),
+                     str(rrclass), str(exception))
 
 class XfrinRecorder:
     def __init__(self):
diff --git a/src/bin/xfrin/xfrin_messages.mes b/src/bin/xfrin/xfrin_messages.mes
index e5d1733..81bd649 100644
--- a/src/bin/xfrin/xfrin_messages.mes
+++ b/src/bin/xfrin/xfrin_messages.mes
@@ -29,6 +29,21 @@ this can only happen for AXFR.
 The XFR transfer for the given zone has failed due to a protocol error.
 The error is shown in the log message.
 
+% XFRIN_XFR_PROCESS_FAILURE %1 transfer of zone %2/%3 failed: %4
+An XFR session failed outside the main protocol handling.  This
+includes an error at the data source level at the initialization
+phase, unexpected failure in the network connection setup to the
+master server, or even more unexpected failure due to unlikely events
+such as memory allocation failure.  Details of the error are shown in
+the log message.  In general, these errors are not really expected
+ones, and indicate an installation error or a program bug.  The
+session handler thread tries to clean up all intermediate resources
+even on these errors, but it may be incomplete.  So, if this log
+message continuously appears, system resource consumption should be
+checked, and you may even want to disable the corresponding transfers.
+You may also want to file a bug report if this message appears so
+often.
+
 % XFRIN_XFR_TRANSFER_STARTED %1 transfer of zone %2 started
 A connection to the master server has been made, the serial value in
 the SOA record has been checked, and a zone transfer has been started.
diff --git a/src/lib/python/isc/datasrc/tests/datasrc_test.py b/src/lib/python/isc/datasrc/tests/datasrc_test.py
index 15fa347..dcb8904 100644
--- a/src/lib/python/isc/datasrc/tests/datasrc_test.py
+++ b/src/lib/python/isc/datasrc/tests/datasrc_test.py
@@ -20,6 +20,7 @@ import isc.dns
 import unittest
 import os
 import shutil
+import sys
 import json
 
 TESTDATA_PATH = os.environ['TESTDATA_PATH'] + os.sep
@@ -494,6 +495,23 @@ class DataSrcUpdater(unittest.TestCase):
                          dsc.get_updater(isc.dns.Name("notexistent.example"),
                                          True))
 
+    def test_client_reference(self):
+        # Temporarily create various objects using factory methods of the
+        # client.  The created objects won't be stored anywhere and
+        # immediately released.  The creation shouldn't affect the reference
+        # to the base client.
+        dsc = isc.datasrc.DataSourceClient("sqlite3", WRITE_ZONE_DB_CONFIG)
+        orig_ref = sys.getrefcount(dsc)
+
+        dsc.find_zone(isc.dns.Name("example.com"))
+        self.assertEqual(orig_ref, sys.getrefcount(dsc))
+
+        dsc.get_iterator(isc.dns.Name("example.com."))
+        self.assertEqual(orig_ref, sys.getrefcount(dsc))
+
+        dsc.get_updater(isc.dns.Name("example.com"), True)
+        self.assertEqual(orig_ref, sys.getrefcount(dsc))
+
 if __name__ == "__main__":
     isc.log.init("bind10")
     unittest.main()
diff --git a/src/lib/python/isc/datasrc/updater_python.cc b/src/lib/python/isc/datasrc/updater_python.cc
index e447622..29d2ffe 100644
--- a/src/lib/python/isc/datasrc/updater_python.cc
+++ b/src/lib/python/isc/datasrc/updater_python.cc
@@ -270,15 +270,16 @@ PyObject*
 createZoneUpdaterObject(isc::datasrc::ZoneUpdaterPtr source,
                         PyObject* base_obj)
 {
-    s_ZoneUpdater* py_zi = static_cast<s_ZoneUpdater*>(
+    s_ZoneUpdater* py_zu = static_cast<s_ZoneUpdater*>(
         zoneupdater_type.tp_alloc(&zoneupdater_type, 0));
-    if (py_zi != NULL) {
-        py_zi->cppobj = source;
-    }
-    if (base_obj != NULL) {
-        Py_INCREF(base_obj);
+    if (py_zu != NULL) {
+        py_zu->cppobj = source;
+        py_zu->base_obj = base_obj;
+        if (base_obj != NULL) {
+            Py_INCREF(base_obj);
+        }
     }
-    return (py_zi);
+    return (py_zu);
 }
 
 } // namespace python




More information about the bind10-changes mailing list