BIND 10 exp/res-research, updated. 8815570e3dad3076b298c4226bdebd49d7031f34 [res-research] prevent NS fetch loop

Fri Jul 13 19:31:56 UTC 2012

The branch, exp/res-research has been updated
       via  8815570e3dad3076b298c4226bdebd49d7031f34 (commit)
       via  9e42f924adaacedda4f2d2c840e4432491adc508 (commit)
       via  8bac00e308d31d69198bcf4caa5e30b27d39168e (commit)
       via  db0e12e6ae60de560acfe7fcd75563729c27cddb (commit)
       via  cfaa1e9996a45b4f4b3b4474280b2a7ae11c0a7c (commit)
       via  403189c3d019a3e7869943c9b70ba17e9acdffc3 (commit)
       via  29449c160dc12696e55462564bbcccd31c1cb40e (commit)
       via  bee7544d8cf27c59a0e45ac4a3806d03d1ffa536 (commit)
       via  9bda06bea85fdccd0eb3b5c95e597aceb9d627f7 (commit)
      from  ae34359677345d4fd2dc49111c3d5d8d69d9d3a3 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 8815570e3dad3076b298c4226bdebd49d7031f34
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Jul 13 12:31:15 2012 -0700

    [res-research] prevent NS fetch loop

commit 9e42f924adaacedda4f2d2c840e4432491adc508
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Jul 13 12:30:21 2012 -0700

    [res-research] record msglen of neg answers; fetch answer NS addrs more often.

commit 8bac00e308d31d69198bcf4caa5e30b27d39168e
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Jul 13 10:14:36 2012 -0700

    [res-research] supported tracing type ANY queries

commit db0e12e6ae60de560acfe7fcd75563729c27cddb
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Jul 13 00:32:17 2012 -0700

    [res-research] counted # of external messages with the resp sizes on cache miss

commit cfaa1e9996a45b4f4b3b4474280b2a7ae11c0a7c
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Jul 13 00:10:06 2012 -0700

    [res-research] supported NS fetch emulation

commit 403189c3d019a3e7869943c9b70ba17e9acdffc3
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Thu Jul 12 23:26:44 2012 -0700

    [res-research] cleanup

commit 29449c160dc12696e55462564bbcccd31c1cb40e
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Thu Jul 12 23:18:00 2012 -0700

    [res-research] replay CNAME update

commit bee7544d8cf27c59a0e45ac4a3806d03d1ffa536
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Thu Jul 12 22:28:14 2012 -0700

    [res-research] query replay, basic cases

commit 9bda06bea85fdccd0eb3b5c95e597aceb9d627f7
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Thu Jul 12 14:08:56 2012 -0700

    [res-research] printed some additional statistics

-----------------------------------------------------------------------

Summary of changes:
 exp/res-research/analysis/dns_cache.py     |   39 ++-
 exp/res-research/analysis/mini_resolver.py |   23 +-
 exp/res-research/analysis/query_replay.py  |  409 ++++++++++++++++++++++++++--
 3 files changed, 427 insertions(+), 44 deletions(-)

-----------------------------------------------------------------------

diff --git a/exp/res-research/analysis/dns_cache.py b/exp/res-research/analysis/dns_cache.py
index 32d3488..93278d4 100755
--- a/exp/res-research/analysis/dns_cache.py
+++ b/exp/res-research/analysis/dns_cache.py
@@ -85,6 +85,16 @@ class CacheEntry:
         self.rcode = other.rcode
         self.id = other.id
 
+    def is_expired(self, now):
+        # This is cheating, but for now we assume all "local" entries have
+        # permanent TTL
+        if self.trust == SimpleDNSCache.TRUST_LOCAL:
+            return False
+
+        if self.time_updated is None or self.time_updated + self.ttl < now:
+            return True
+        return False
+
 # Don't worry about cache expire; just record the RRs
 class SimpleDNSCache:
     '''A simplified DNS cache database.
@@ -118,18 +128,28 @@ class SimpleDNSCache:
         self.__counter = 0      # unique ID for CacheEntry's
         self.__entries = {}     # ID => CacheEntry
 
-    def find(self, name, rrclass, rrtype, options=FIND_DEFAULT):
+    def get(self, entry_id):
+        return self.__entries[entry_id]
+
+    def find(self, name, rrclass, rrtype, options=FIND_DEFAULT, trust=None):
+        '''Find a specified (name, class, type) from the cache.
+
+        options are a combination of various search options.
+        if trust is specified, only the specified trust level is considered.
+        If this is specified , FIND_ALLOW_NOANSWER option shouldn't be set.
+
+        '''
         key = (name, rrclass)
         rdata_map = self.__table.get((name, rrclass))
         rcode, rrset, trust, id = self.__find_type(name, rrclass, rrtype,
-                                                   rdata_map, options)
+                                                   rdata_map, options, trust)
         if ((options & self.FIND_ALLOW_CNAME) != 0 and
             rrtype != RRType.CNAME()):
             # If CNAME is allowed, see if there is one.  If there is and it
             # has a higher trust level, we use it.
             rcode_cname, rrset_cname, trust_cname, id_cname = \
                 self.__find_type(name, rrclass, RRType.CNAME(), rdata_map,
-                                 options)
+                                 options, trust)
             if (rrset_cname is not None and
                 (trust is None or trust_cname < trust)):
                 return rcode_cname, rrset_cname, id_cname
@@ -147,18 +167,20 @@ class SimpleDNSCache:
             else:
                 return rcode, None
 
-    def __find_type(self, name, rrclass, type, rdata_map, options):
+    def __find_type(self, name, rrclass, rrtype, rdata_map, options, trust):
         '''A subroutine of find, finding an RRset of a given type.'''
-        if rdata_map is not None and type in rdata_map:
-            entries = rdata_map[type]
+        if rdata_map is not None and rrtype in rdata_map:
+            entries = rdata_map[rrtype]
             entry = entries[0]
             if (options & self.FIND_ALLOW_NOANSWER) == 0:
                 entry = self.__find_cache_entry(entries, self.TRUST_ANSWER)
+            elif trust is not None:
+                entry = self.__find_cache_entry(entries, trust, True)
             if entry is None:
                 return None, None, None, None
             rcode = Rcode(entry.rcode)
             (ttl, rdata_list) = (entry.ttl, entry.rdata_list)
-            rrset = RRset(name, rrclass, type, RRTTL(ttl))
+            rrset = RRset(name, rrclass, rrtype, RRTTL(ttl))
             for rdata in rdata_list:
                 rrset.add_rdata(rdata)
             if rrset.get_rdata_count() == 0 and \
@@ -210,7 +232,7 @@ class SimpleDNSCache:
 
         '''
         entry = self.__entries[entry_id]
-        if entry.time_updated is None or entry.time_updated + entry.ttl < now:
+        if entry.is_expired(now):
             entry.time_updated = now
             return True
         return False
@@ -229,6 +251,7 @@ class SimpleDNSCache:
                                               rcode)
         if not key in self.__table:
             self.__table[key] = {rrset.get_type(): [new_entry]}
+            new_entry._table_entry = self.__table[key]
         else:
             table_ent = self.__table[key]
             cur_entries = table_ent.get(rrset.get_type())
diff --git a/exp/res-research/analysis/mini_resolver.py b/exp/res-research/analysis/mini_resolver.py
index ee183ff..6212f36 100755
--- a/exp/res-research/analysis/mini_resolver.py
+++ b/exp/res-research/analysis/mini_resolver.py
@@ -356,8 +356,7 @@ class ResolverContext:
                     self.dprint(LOGLVL_INFO, 'bogus SOA name for negative: %s',
                                 [auth_rrset.get_name()])
                     continue
-                self.__cache.add(auth_rrset, SimpleDNSCache.TRUST_ANSWER,
-                                 msglen)
+                self.__cache.add(auth_rrset, SimpleDNSCache.TRUST_ANSWER, 0)
                 neg_ttl = get_soa_ttl(auth_rrset.get_rdata()[0])
                 self.dprint(LOGLVL_DEBUG10,
                             'got a negative response, code=%s, negTTL=%s',
@@ -370,7 +369,7 @@ class ResolverContext:
             neg_ttl = self.DEFAULT_NEGATIVE_TTL
         neg_rrset = RRset(self.__qname, self.__qclass, self.__qtype,
                           RRTTL(neg_ttl))
-        self.__cache.add(neg_rrset, SimpleDNSCache.TRUST_ANSWER, 0, rcode)
+        self.__cache.add(neg_rrset, SimpleDNSCache.TRUST_ANSWER, msglen, rcode)
 
     def __handle_referral(self, resp_msg, ns_rrset, msglen):
         self.dprint(LOGLVL_DEBUG10, 'got a referral: %s', [ns_rrset])
@@ -389,8 +388,10 @@ class ResolverContext:
                 self.dprint(LOGLVL_DEBUG10, 'got glue for referral: %s',
                             [ad_rrset])
                 self.__cache.add(ad_rrset, SimpleDNSCache.TRUST_GLUE)
+        prev_zone = self.__cur_zone
         self.__cur_zone = ns_rrset.get_name()
-        (self.__ns_addr4, self.__ns_addr6) = self.__find_ns_addrs(ns_rrset)
+        (self.__ns_addr4, self.__ns_addr6) = \
+            self.__find_ns_addrs(ns_rrset, True, prev_zone)
         return self.__try_next_server()
 
     def __try_next_server(self):
@@ -445,7 +446,8 @@ class ResolverContext:
                 return zname, ns_rrset
         raise MiniResolverException('no name server found for ' + str(qname))
 
-    def __find_ns_addrs(self, nameservers, fetch_if_notfound=True):
+    def __find_ns_addrs(self, nameservers, fetch_if_notfound,
+                        delegating_zone=None):
         v4_addrs = []
         v6_addrs = []
         rcode4 = None
@@ -455,17 +457,24 @@ class ResolverContext:
         for ns in nameservers.get_rdata():
             ns_name = Name(ns.to_text())
             ns_names.append(ns_name)
+            find_option = SimpleDNSCache.FIND_ALLOW_NOANSWER
+            if delegating_zone is not None and fetch_if_notfound:
+                cmp_reln = delegating_zone.compare(ns_name).get_relation()
+                if (cmp_reln != NameComparisonResult.EQUAL and
+                    cmp_reln != NameComparisonResult.SUPERDOMAIN):
+                    # out-of-zone glue.  fetch real answer if it doesn't exist.
+                    find_option = SimpleDNSCache.FIND_DEFAULT
             if self.__sock4:
                 rcode4, rrset4, _ = \
                     self.__cache.find(ns_name, ns_class, RRType.A(),
-                                      SimpleDNSCache.FIND_ALLOW_NOANSWER)
+                                      find_option)
                 if rrset4 is not None:
                     for rdata in rrset4.get_rdata():
                         v4_addrs.append((rdata.to_text(), DNS_PORT))
             if self.__sock6:
                 rcode6, rrset6, _ = \
                     self.__cache.find(ns_name, ns_class, RRType.AAAA(),
-                                      SimpleDNSCache.FIND_ALLOW_NOANSWER)
+                                      find_option)
                 if rrset6 is not None:
                     for rdata in rrset6.get_rdata():
                         # specify 0 for flowinfo and scopeid unconditionally
diff --git a/exp/res-research/analysis/query_replay.py b/exp/res-research/analysis/query_replay.py
index c821689..47de267 100755
--- a/exp/res-research/analysis/query_replay.py
+++ b/exp/res-research/analysis/query_replay.py
@@ -18,6 +18,8 @@
 from isc.dns import *
 import parse_qrylog
 import dns_cache
+
+import datetime
 from optparse import OptionParser
 import re
 import sys
@@ -25,6 +27,12 @@ import sys
 convert_rrtype = parse_qrylog.convert_rrtype
 RE_LOGLINE = parse_qrylog.RE_LOGLINE
 
+LOGLVL_INFO = 0
+LOGLVL_DEBUG1 = 1
+LOGLVL_DEBUG3 = 3    # rare event, but can happen in real world
+LOGLVL_DEBUG5 = 5           # some unexpected event, but sometimes happen
+LOGLVL_DEBUG10 = 10         # detailed trace
+
 class QueryReplaceError(Exception):
     pass
 
@@ -35,8 +43,9 @@ class QueryTrace:
     resp_size(int): estimated size of the response to the query.
 
     '''
-    def __init__(self, ttl, cache_entries):
+    def __init__(self, ttl, cache_entries, rcode):
         self.ttl = ttl
+        self.rcode = rcode
         self.__cache_entries = cache_entries
         self.cname_trace = []
         self.__cache_log = []
@@ -70,18 +79,16 @@ class QueryTrace:
         In that case we consider it valid as long as one of them is valid.
 
         '''
-        expired = False
-        for trace in [self] + self.cname_trace:
-            if trace.__cache_expired(cache, now):
-               expired = True
-        return expired
+        expired = 0
+        for cache_entry_id in self.__cache_entries:
+            entry = cache.get(cache_entry_id)
+            if entry.is_expired(now):
+                expired += 1
+        return len(self.__cache_entries) == expired
 
-    def __cache_expired(self, cache, now):
-        updated = 0
+    def update(self, cache, now):
         for cache_entry_id in self.__cache_entries:
-            if cache.update(cache_entry_id, now):
-                updated += 1
-        return len(self.__cache_entries) == updated
+            cache.update(cache_entry_id, now)
 
 class CacheLog:
     '''consists of...
@@ -98,11 +105,258 @@ class CacheLog:
         self.hits = 0
         self.misses = 1 if on_miss else 0
 
+class ResolverContext:
+    '''Emulated resolver context.'''
+    FETCH_DEPTH_MAX = 8         # prevent infinite NS fetch
+
+    def __init__(self, qname, qclass, qtype, cache, now, dbg_level, nest=0):
+        self.__qname = qname
+        self.__qclass = qclass
+        self.__qtype = qtype
+        self.__cache = cache
+        self.__now = now
+        self.__dbg_level = dbg_level
+        self.__cur_zone = qname # sentinel
+        self.__nest = nest
+
+    def dprint(self, level, msg, params=[]):
+        '''Dump a debug/log message.'''
+        if self.__dbg_level < level:
+            return
+        date_time = str(datetime.datetime.today())
+        postfix = '[%s/%s/%s at %s]' % (self.__qname.to_text(True),
+                                        str(self.__qclass), str(self.__qtype),
+                                        self.__cur_zone)
+        sys.stdout.write(('%s ' + msg + ' %s\n') %
+                         tuple([date_time] + [str(p) for p in params] +
+                               [postfix]))
+
+    def resolve(self):
+        # The cache actually has the resolution result with full delegation
+        # chain; it's just some part of it has expired.  We first need to
+        # identify the part to be resolved.
+        chain, resp_list = self.__get_resolve_chain()
+        if len(chain) == 1:
+            return chain[0][1], False, resp_list
+
+        self.dprint(LOGLVL_DEBUG5, 'cache miss, emulate resolving')
+
+        while True:
+            nameservers = chain[-1][1] # deepest active NS RRset
+            self.__cur_zone = nameservers.get_name()
+            self.dprint(LOGLVL_DEBUG10, 'reach a zone cut')
+
+            have_addr, fetch_list = self.__find_ns_addrs(nameservers)
+            if not have_addr:
+                # If fetching NS addresses fail, we should be at the end of
+                # chain
+                found, fetch_resps = self.__fetch_ns_addrs(fetch_list)
+                resp_list.extend(fetch_resps)
+                if not found:
+                    chain = chain[:-1]
+                    if len(chain) != 1:
+                        raise QueryReplaceError('assumption failure')
+                    break
+
+            # "send query, then get response".  Now we can consider the
+            # delegation records (NS and glue AAAA/A) one level deepr active
+            # again.
+            self.dprint(LOGLVL_DEBUG10, 'external query')
+            chain = chain[:-1]
+            if len(chain) == 1: # reached deepest zone, update the final cache
+                break
+
+            # Otherwise, go down to the zone one level lower.
+            new_id, nameservers = chain[-1]
+            self.dprint(LOGLVL_DEBUG10, 'update NS at zone %s, trust %s',
+                        [nameservers.get_name(),
+                         self.__cache.get(new_id).trust])
+            self.__cache.update(new_id, self.__now)
+            self.__update_glue(nameservers)
+
+        # We've reached the deepest zone (which should normally contain the
+        # query name)
+        self.dprint(LOGLVL_DEBUG5, 'resolution completed')
+        self.__cache.update(chain[0][0], self.__now)
+        return chain[0][1], True, resp_list
+
+    def __fetch_ns_addrs(self, fetch_list):
+        if self.__nest > self.FETCH_DEPTH_MAX:
+            self.dprint(LOGLVL_INFO, 'reached fetch depth limit, aborted')
+            return False, []
+
+        self.dprint(LOGLVL_DEBUG10, 'no NS addresses are known, fetch them.')
+        ret_resp_list = []
+        skipped = 0
+        for fetch in fetch_list:
+            ns_name, addr_type = fetch[1], fetch[0]
+
+            # First, check if we know this RR at all in the first place.
+            # It could happen that in the original resolution the resolver
+            # already knew some of the missing addresses as an answer (as a
+            # result or side effect of prior resolution) and didn't bother to
+            # try to fetch others.  In that case, our cache doesn't have any
+            # information for this record (which would crash resolution
+            # emulation below).
+            if self.__cache.find(ns_name, self.__qclass, addr_type)[0] is None:
+                skipped += 1
+                self.dprint(LOGLVL_DEBUG10, 'skip fetching NS addrs %s/%s/%s',
+                            [ns_name, self.__qclass, addr_type])
+                continue
+
+            res_ctx = ResolverContext(ns_name, self.__qclass, addr_type,
+                                      self.__cache, self.__now,
+                                      self.__dbg_level, self.__nest + 1)
+            rrset, updated, resp_list = res_ctx.resolve()
+            ret_resp_list.extend(resp_list)
+            if not updated:
+                raise QueryReplaceError('assumption failure')
+            if rrset.get_rdata_count() > 0: # positive result
+                self.dprint(LOGLVL_DEBUG10,
+                            'fetching an NS address succeeded for %s/%s/%s',
+                            [ns_name, self.__qclass, addr_type])
+                return True, ret_resp_list
+            self.dprint(LOGLVL_DEBUG10,
+                        'fetching an NS address failed for %s/%s/%s',
+                        [ns_name, self.__qclass, addr_type])
+
+        # We should be able to try fetching at least one of the requested
+        # addrs.  If not, it means internnal inconsistency.
+        if skipped == len(fetch_list):
+            raise QueryReplaceError('assumption failure in NS fetch for ' +
+                                    '%s/%s' % (ns_name, addr_type))
+
+        # All attempts fail
+        self.dprint(LOGLVL_DEBUG10, 'fetching an NS address failed')
+        return False, ret_resp_list
+
+    def __update_glue(self, ns_rrset):
+        '''Update cached glue records for in-zone glue of given NS RRset.'''
+        for ns_name in [Name(ns.to_text()) for ns in ns_rrset.get_rdata()]:
+            # Exclude out-of-zone glue
+            cmp_reln = self.__cur_zone.compare(ns_name).get_relation()
+            if (cmp_reln != NameComparisonResult.SUPERDOMAIN and
+                cmp_reln != NameComparisonResult.EQUAL):
+                self.dprint(LOGLVL_DEBUG10,
+                            'Exclude out-of-zone glue: %s', [ns_name])
+                continue
+            _, rrset, id = self.__find_delegate_info(ns_name, RRType.A())
+            if id is not None:
+                self.dprint(LOGLVL_DEBUG10, 'Update IPv4 glue: %s', [ns_name])
+                self.__cache.update(id, self.__now)
+            _, rrset, id = self.__find_delegate_info(ns_name, RRType.AAAA())
+            if id is not None:
+                self.dprint(LOGLVL_DEBUG10, 'Update IPv6 glue: %s', [ns_name])
+                self.__cache.update(id, self.__now)
+
+    def __get_resolve_chain(self):
+        chain = []
+        resp_list = []
+        rcode, answer_rrset, id = \
+            self.__cache.find(self.__qname, self.__qclass, self.__qtype,
+                              dns_cache.SimpleDNSCache.FIND_ALLOW_CNAME |
+                              dns_cache.SimpleDNSCache.FIND_ALLOW_NEGATIVE)
+        entry = self.__cache.get(id)
+        chain.append((id, answer_rrset))
+        resp_list.append(entry.msglen)
+        if not entry.is_expired(self.__now):
+            return chain, []
+
+        for l in range(0, self.__qname.get_labelcount()):
+            zname = self.__qname.split(l)
+            _, ns_rrset, id = self.__find_delegate_info(zname, RRType.NS())
+            if ns_rrset is None:
+                continue
+            entry = self.__cache.get(id)
+            self.dprint(LOGLVL_DEBUG10, 'build resolve chain at %s, trust %s',
+                        [zname, entry.trust])
+            chain.append((id, ns_rrset))
+            if not entry.is_expired(self.__now):
+                return chain, resp_list
+            resp_list.append(entry.msglen)
+
+        # In our setup root server should be always available.
+        raise QueryReplaceError('no name server found for ' + str(qname))
+
+    def __find_ns_addrs(self, nameservers):
+        # We only need to know whether we have at least one usable address:
+        have_address = False
+
+        # Record any missing address to be fetched.
+        fetch_list = []
+        for ns_name in [Name(ns.to_text()) for ns in nameservers.get_rdata()]:
+            rcode, rrset4, id = self.__find_delegate_info(ns_name, RRType.A(),
+                                                          True)
+            if rrset4 is not None and rrset4.get_rdata_count() > 0:
+                self.dprint(LOGLVL_DEBUG10, 'found %s IPv4 address for NS %s',
+                            [rrset4.get_rdata_count(), ns_name])
+                have_address = True
+            elif rcode is None:
+                fetch_list.append((RRType.A(), ns_name))
+
+            rcode, rrset6, id = \
+                self.__find_delegate_info(ns_name, RRType.AAAA(), True)
+            if rrset6 is not None and rrset6.get_rdata_count() > 0:
+                self.dprint(LOGLVL_DEBUG10, 'found %s IPv6 address for NS %s',
+                            [rrset6.get_rdata_count(), ns_name])
+                have_address = True
+            elif rcode is None:
+                fetch_list.append((RRType.AAAA(), ns_name))
+
+        return have_address, fetch_list
+
+    def __find_delegate_info(self, name, rrtype, active_only=False):
+        '''Find an RRset from the cache that can be used for delegation.
+
+        This is for NS and glue AAAA and As.  If active_only is True,
+        expired RRsets won't be returned.
+
+        '''
+        # We first try an "answer" that has higher trust level and see
+        # if it's cached and still alive.
+        # If it's cached, either positive or negative, rcode should be !None.
+        # We use that result in that case
+        options = 0
+        ans_rcode, ans_rrset, ans_id = \
+            self.__cache.find(name, self.__qclass, rrtype,
+                              dns_cache.SimpleDNSCache.FIND_ALLOW_NEGATIVE)
+        if (ans_rcode is not None and
+            not self.__cache.get(ans_id).is_expired(self.__now)):
+            return ans_rcode, ans_rrset, ans_id
+
+        # Next, if the requested type is NS, we consider a cached RRset at
+        # the "auth authority" trust level. In this case checking rcode is
+        # meaningless, so we check rrset (the result should be the same).
+        # We use it only if it's active.
+        if rrtype == RRType.NS():
+            rcode, rrset, id = \
+                self.__cache.find(name, self.__qclass, rrtype,
+                                  dns_cache.SimpleDNSCache.FIND_ALLOW_NOANSWER,
+                                  dns_cache.SimpleDNSCache.TRUST_AUTHAUTHORITY)
+            if (rrset is not None and
+                not self.__cache.get(id).is_expired(self.__now)):
+                return rcode, rrset, id
+
+        # Finally try delegation or glues.  checking rcode is meaningless
+        # like the previous case.
+        # Unlike other cases, we use it whether it's active or expired unless
+        # explicitly requested to exclude expired ones.
+        rcode, rrset, id = \
+            self.__cache.find(name, self.__qclass, rrtype,
+                              dns_cache.SimpleDNSCache.FIND_ALLOW_NOANSWER,
+                              dns_cache.SimpleDNSCache.TRUST_GLUE)
+        if (rrset is not None and
+            (not active_only or
+             not self.__cache.get(id).is_expired(self.__now))):
+            return rcode, rrset, id
+
+        return None, None, None
+
 class QueryReplay:
     CACHE_OPTIONS = dns_cache.SimpleDNSCache.FIND_ALLOW_CNAME | \
         dns_cache.SimpleDNSCache.FIND_ALLOW_NEGATIVE
 
-    def __init__(self, log_file, cache):
+    def __init__(self, log_file, cache, dbg_level):
         self.__log_file = log_file
         self.__cache = cache
         # Replay result.
@@ -110,8 +364,25 @@ class QueryReplay:
         self.__queries = {}
         self.__total_queries = 0
         self.__query_params = None
+        self.__cur_query = None # use for debug out
+        self.__dbg_level = int(dbg_level)
         self.__resp_msg = Message(Message.RENDER) # for resp size estimation
         self.__renderer = MessageRenderer()
+        self.__rcode_stat = {}  # RCODE value (int) => query counter
+        self.__qtype_stat = {} # RR type => query counter
+
+    def dprint(self, level, msg, params=[]):
+        '''Dump a debug/log message.'''
+        if self.__dbg_level < level:
+            return
+        date_time = str(datetime.datetime.today())
+        postfix = ''
+        if self.__cur_query is not None:
+            postfix = ' [%s/%s/%s]' % \
+                (self.__cur_query[0], self.__cur_query[1], self.__cur_query[2])
+        sys.stdout.write(('%s ' + msg + '%s\n') %
+                         tuple([date_time] + [str(p) for p in params] +
+                               [postfix]))
 
     def replay(self):
         with open(self.__log_file) as f:
@@ -120,8 +391,8 @@ class QueryReplay:
                 try:
                     self.__replay_query(log_line)
                 except Exception as ex:
-                    sys.stderr.write('error (' + str(ex) + ') at line: ' +
-                                     log_line)
+                    self.dprint(LOGLVL_INFO,
+                                'error (%s) at line: %s', [ex, log_line])
                     raise ex
         return self.__total_queries, len(self.__queries)
 
@@ -129,7 +400,7 @@ class QueryReplay:
         '''Replay a single query.'''
         m = re.match(RE_LOGLINE, log_line)
         if not m:
-            sys.stderr.write('unexpected line: ' + log_line)
+            self.dprint(LOGLVL_INFO, 'unexpected line: %s', [log_line])
             return
         qry_time = float(m.group(1))
         client_addr = m.group(2)
@@ -138,22 +409,78 @@ class QueryReplay:
         qry_type = RRType(convert_rrtype(m.group(5)))
         qry_key = (qry_name, qry_type, qry_class)
 
+        self.__cur_query = (qry_name, qry_class, qry_type)
+
+        if not qry_type in self.__qtype_stat:
+            self.__qtype_stat[qry_type] = 0
+        self.__qtype_stat[qry_type] += 1
+
         qinfo = self.__queries.get(qry_key)
         if qinfo is None:
             qinfo, rrsets = \
                 self.__get_query_trace(qry_name, qry_class, qry_type)
             qinfo.resp_size = self.__calc_resp_size(qry_name, rrsets)
             self.__queries[qry_key] = qinfo
-        if qinfo.cache_expired(self.__cache, qry_time):
+        if not qinfo.rcode.get_code() in self.__rcode_stat:
+            self.__rcode_stat[qinfo.rcode.get_code()] = 0
+        self.__rcode_stat[qinfo.rcode.get_code()] += 1
+        expired, resp_list = \
+            self.__check_expired(qinfo, qry_name, qry_class, qry_type,
+                                 qry_time)
+        if expired:
+            self.dprint(LOGLVL_DEBUG3,
+                        'cache miss, updated with %s messages (%s)',
+                        [len(resp_list), resp_list])
             cache_log = CacheLog(qry_time)
             qinfo.add_cache_log(cache_log)
         else:
+            self.dprint(LOGLVL_DEBUG3, 'cache hit')
             cache_log = qinfo.get_last_cache()
             if cache_log is None:
                 cache_log = CacheLog(qry_time, False)
                 qinfo.add_cache_log(cache_log)
             cache_log.hits += 1
 
+    def __check_expired(self, qinfo, qname, qclass, qtype, now):
+        if qtype == RRType.ANY():
+            return self.__check_expired_any(qinfo, qname, qclass, now)
+        is_cname_qry = qtype == RRType.CNAME()
+        expired = False
+        ret_resp_list = []
+        for trace in [qinfo] + qinfo.cname_trace:
+            res_ctx = ResolverContext(qname, qclass, qtype, self.__cache, now,
+                                      self.__dbg_level)
+            rrset, res_updated, resp_list = res_ctx.resolve()
+            ret_resp_list.extend(resp_list)
+            if res_updated:
+                expired = True
+            if (rrset is not None and not is_cname_qry and
+                rrset.get_type() == RRType.CNAME()):
+                qname = Name(rrset.get_rdata()[0].to_text())
+        return expired, ret_resp_list
+
+    def __check_expired_any(self, qinfo, qname, qclass, now):
+        rcode, val = self.__cache.find_all(qname, qclass, self.CACHE_OPTIONS)
+        rrsets = self.__get_type_any_info(rcode, val)[2]
+        if rcode != Rcode.NOERROR():
+            res_ctx = ResolverContext(qname, qclass, RRType.ANY(),
+                                      self.__cache, now, self.__dbg_level)
+            _, res_updated, resp_list = res_ctx.resolve()
+            return res_updated, resp_list
+        for rrset in rrsets:
+            res_ctx = ResolverContext(rrset.get_name(), qclass,
+                                      rrset.get_type(), self.__cache, now,
+                                      self.__dbg_level)
+            _, res_updated, resp_list = res_ctx.resolve()
+            if res_updated:
+                # If one of the RRsets has expired and been updated, we would
+                # have updated all of the "ANY" result.  So don't bother to
+                # to replay for the rest; just update the entire cache and
+                # we are done.
+                qinfo.update(self.__cache, now)
+                return True, resp_list
+        return False, []
+
     def __calc_resp_size(self, qry_name, rrsets):
         self.__renderer.clear()
         self.__resp_msg.clear(Message.RENDER)
@@ -178,12 +505,12 @@ class QueryReplay:
             rcode, val = self.__cache.find_all(qry_name, qry_class,
                                                self.CACHE_OPTIONS)
             ttl, ids, rrsets = self.__get_type_any_info(rcode, val)
-            return QueryTrace(ttl, ids), rrsets
+            return QueryTrace(ttl, ids, rcode), rrsets
 
         rcode, rrset, id = self.__cache.find(qry_name, qry_class, qry_type,
                                              self.CACHE_OPTIONS)
         # Same for type CNAME query or when it's not CNAME substitution.
-        qtrace = QueryTrace(rrset.get_ttl().get_value(), [id])
+        qtrace = QueryTrace(rrset.get_ttl().get_value(), [id], rcode)
         if qry_type == RRType.CNAME() or rrset.get_type() != RRType.CNAME():
             return qtrace, [rrset]
 
@@ -205,17 +532,14 @@ class QueryReplay:
                     break
             rcode, rrset, id = self.__cache.find(cname, qry_class, qry_type,
                                                  self.CACHE_OPTIONS)
-            try:
-                chain_trace.append(QueryTrace(rrset.get_ttl().get_value(),
-                                              [id]))
-                cnames.append(cname)
-                resp_rrsets.append(rrset)
-                rrtype = rrset.get_type()
-            except Exception as ex:
-                sys.stderr.write('CNAME trace failed for %s/%s/%s at %s\n' %
-                                 (qry_name, qry_class, qry_type, cname))
-                break
+            chain_trace.append(QueryTrace(rrset.get_ttl().get_value(), [id],
+                                          rcode))
+            cnames.append(cname)
+            resp_rrsets.append(rrset)
+            rrtype = rrset.get_type()
         qtrace.cname_trace = chain_trace
+        # We return the RCODE for the end of the chain
+        qtrace.rcode = rcode
         return qtrace, resp_rrsets
 
     def __get_type_any_info(self, rcode, find_val):
@@ -268,28 +592,55 @@ class QueryReplay:
                                            qry_param[2], qry_param[0],
                                            qry_param[1]))
 
+    def dump_rcode_stat(self):
+        print('\nPer RCODE statistics:')
+        rcodes = list(self.__rcode_stat.keys())
+        rcodes.sort(key=lambda x: -self.__rcode_stat[x])
+        for rcode_val in rcodes:
+            print('%s: %d' % (Rcode(rcode_val), self.__rcode_stat[rcode_val]))
+
+    def dump_qtype_stat(self):
+        print('\nPer Query Type statistics:')
+        qtypes = list(self.__qtype_stat.keys())
+        qtypes.sort(key=lambda x: -self.__qtype_stat[x])
+        for qtype in qtypes:
+            print('%s: %d' % (qtype, self.__qtype_stat[qtype]))
+
 def main(log_file, options):
     cache = dns_cache.SimpleDNSCache()
     cache.load(options.cache_dbfile)
-    replay = QueryReplay(log_file, cache)
+    replay = QueryReplay(log_file, cache, options.dbg_level)
     total_queries, uniq_queries = replay.replay()
     print('Replayed %d queries (%d unique)' % (total_queries, uniq_queries))
     if options.popularity_file is not None:
         replay.dump_popularity_stat(options.popularity_file)
     if options.query_dump_file is not None:
         replay.dump_queries(options.query_dump_file)
+    if options.dump_rcode_stat:
+        replay.dump_rcode_stat()
+    if options.dump_qtype_stat:
+        replay.dump_qtype_stat()
 
 def get_option_parser():
     parser = OptionParser(usage='usage: %prog [options] log_file')
     parser.add_option("-c", "--cache-dbfile",
                       dest="cache_dbfile", action="store", default=None,
                       help="Serialized DNS cache DB")
+    parser.add_option("-d", "--dbg-level", dest="dbg_level", action="store",
+                      default=0,
+                      help="specify the verbosity level of debug output")
     parser.add_option("-p", "--dump-popularity",
                       dest="popularity_file", action="store",
                       help="dump statistics per query popularity")
     parser.add_option("-q", "--dump-queries",
                       dest="query_dump_file", action="store",
                       help="dump unique queries")
+    parser.add_option("-r", "--dump-rcode-stat",
+                      dest="dump_rcode_stat", action="store_true",
+                      default=False, help="dump per RCODE statistics")
+    parser.add_option("-t", "--dump-qtype-stat",
+                      dest="dump_qtype_stat", action="store_true",
+                      default=False, help="dump per type statistics")
     return parser
 
 if __name__ == "__main__":