BIND 10 trac1603bench, updated. 2c5c832d39a85ded50864f85dbf94d3f8d076fdc [1603bench] Merge branch 'trac1603' into trac1603bench

Fri Mar 9 18:46:59 UTC 2012

The branch, trac1603bench has been updated
       via  2c5c832d39a85ded50864f85dbf94d3f8d076fdc (commit)
       via  0b55e11e55f2422e2aa1f920f48debac92d826af (commit)
       via  c3962c4f4d5f08e3ff194132a6308dbede212997 (commit)
       via  80617e3d1ea1fc032b2f676ed7b5db4fcdeed964 (commit)
       via  91a89cb8cac804441d957430593c43f40645a44d (commit)
       via  da35123462582bebd7700bbd5a427954d8b73de5 (commit)
       via  8123c01dbd4f71cdff0ba73a45343ff51de0908e (commit)
       via  914212e06d69ae4e24ac673b050dd60a7eadfbae (commit)
      from  b3284a520ea8bcd5dfb32491eeecf2899d27b7e4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 2c5c832d39a85ded50864f85dbf94d3f8d076fdc
Merge: 80617e3 0b55e11
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:45:42 2012 -0800

    [1603bench] Merge branch 'trac1603' into trac1603bench

commit 80617e3d1ea1fc032b2f676ed7b5db4fcdeed964
Merge: da35123 91a89cb
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:19:14 2012 -0800

    [1603bench] Merge branch 'trac1603' into trac1603bench

commit da35123462582bebd7700bbd5a427954d8b73de5
Merge: b3284a5 8123c01
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:15:22 2012 -0800

    [1603bench] Merge branch 'trac1603' into trac1603bench

-----------------------------------------------------------------------

Summary of changes:
 src/lib/dns/messagerenderer.cc |   57 ++++++++++++++++++++++-----------------
 src/lib/dns/name_internal.h    |    6 ++++
 src/lib/util/buffer.h          |    2 +-
 3 files changed, 39 insertions(+), 26 deletions(-)

-----------------------------------------------------------------------

diff --git a/src/lib/dns/messagerenderer.cc b/src/lib/dns/messagerenderer.cc
index 7591b03..1ac5cdf 100644
--- a/src/lib/dns/messagerenderer.cc
+++ b/src/lib/dns/messagerenderer.cc
@@ -19,6 +19,7 @@
 #include <dns/labelsequence.h>
 #include <dns/messagerenderer.h>
 
+#include <boost/array.hpp>
 #include <boost/static_assert.hpp>
 
 #include <limits>
@@ -42,9 +43,14 @@ namespace {     // hide internal-only names from the public namespaces
 /// longest match (ancestor) name against each new name to be rendered into
 /// the buffer.
 struct OffsetItem {
-    OffsetItem(size_t pos, size_t len) : pos_(pos), len_(len)
+    OffsetItem(size_t hash, size_t pos, size_t len) :
+        hash_(hash), pos_(pos), len_(len)
     {}
 
+    /// The hash value for the stored name calculated by LabelSequence.getHash.
+    /// This will help make name comparison in \c NameCompare more efficient.
+    size_t hash_;
+
     /// The position (offset from the beginning) in the buffer where the
     /// name starts.
     uint16_t pos_;
@@ -68,12 +74,16 @@ struct NameCompare {
     /// \param buffer The buffer for rendering used in the caller renderer
     /// \param name_buf An input buffer storing the wire-format data of the
     /// name to be newly rendered (and only that data).
-    NameCompare(const OutputBuffer& buffer, InputBuffer& name_buf) :
-        buffer_(&buffer), name_buf_(&name_buf)
+    /// \param hash The hash value for the name.
+    NameCompare(const OutputBuffer& buffer, InputBuffer& name_buf,
+                size_t hash) :
+        buffer_(&buffer), name_buf_(&name_buf), hash_(hash)
     {}
 
     bool operator()(const OffsetItem& item) const {
-        if (item.len_ != name_buf_->getLength()) {
+        // Trivial inequality check.  If either the hash or the total length
+        // doesn't match, the names are obviously different.
+        if (item.hash_  != hash_ || item.len_ != name_buf_->getLength()) {
             return (false);
         }
 
@@ -134,6 +144,7 @@ private:
 
     const OutputBuffer* buffer_;
     InputBuffer* name_buf_;
+    const size_t hash_;
 };
 }
 
@@ -167,24 +178,24 @@ struct MessageRenderer::MessageRendererImpl {
         }
     }
 
-    uint16_t findOffset(const OutputBuffer& buffer,
-                        InputBuffer& name_buf,
-                        size_t bucket_id, bool case_sensitive)
+    uint16_t findOffset(const OutputBuffer& buffer, InputBuffer& name_buf,
+                        size_t hash, bool case_sensitive) const
     {
         // Find a matching entry, if any.  We use some heuristics here: often
         // the same name appers consecutively (like repeating the same owner
         // name for a single RRset), so in case there's a collision in the
         // bucket it will be more likely to find it in the tail side of the
         // bucket.
+        const size_t bucket_id = hash % BUCKETS;
         vector<OffsetItem>::const_reverse_iterator found;
         if (case_sensitive) {
             found = find_if(table_[bucket_id].rbegin(),
                             table_[bucket_id].rend(),
-                            NameCompare<true>(buffer, name_buf));
+                            NameCompare<true>(buffer, name_buf, hash));
         } else {
             found = find_if(table_[bucket_id].rbegin(),
                             table_[bucket_id].rend(),
-                            NameCompare<false>(buffer, name_buf));
+                            NameCompare<false>(buffer, name_buf, hash));
         }
         if (found != table_[bucket_id].rend()) {
             return (found->pos_);
@@ -192,8 +203,8 @@ struct MessageRenderer::MessageRendererImpl {
         return (NO_OFFSET);
     }
 
-    void addOffset(size_t bucket_id, size_t offset, size_t len) {
-        table_[bucket_id].push_back(OffsetItem(offset, len));
+    void addOffset(size_t hash, size_t offset, size_t len) {
+        table_[hash % BUCKETS].push_back(OffsetItem(hash, offset, len));
     }
 
     // The hash table for the (offset + position in the buffer) entries
@@ -206,6 +217,9 @@ struct MessageRenderer::MessageRendererImpl {
     bool truncated_;
     /// The name compression mode.
     CompressMode compress_mode_;
+
+    // Placeholder for hash values as they are calculated in writeName().
+    boost::array<size_t, Name::MAX_LABELS> seq_hashes_;
 };
 
 MessageRenderer::MessageRenderer() :
@@ -280,13 +294,6 @@ MessageRenderer::writeName(const Name& name, const bool compress) {
     size_t data_len;
     const char* data;
 
-    // We store hash bucket ID for label sequences derived from the name
-    // in order to avoid calculating the hash twice.  The assert ensures
-    // uint8_t is sufficient for our table.
-    uint8_t bucket_ids[Name::MAX_LABELS];
-    BOOST_STATIC_ASSERT((1 << numeric_limits<uint8_t>::digits) >
-                        MessageRendererImpl::BUCKETS);
-
     // Find the offset in the offset table whose name gives the longest
     // match against the name to be rendered.
     size_t nlabels_uncomp;
@@ -299,12 +306,12 @@ MessageRenderer::writeName(const Name& name, const bool compress) {
             ++nlabels_uncomp;
             break;
         }
-        bucket_ids[nlabels_uncomp] =
-            (sequence.getHash(impl_->compress_mode_) %
-             MessageRendererImpl::BUCKETS);
+        // write with range check for safety
+        impl_->seq_hashes_.at(nlabels_uncomp) =
+            sequence.getHash(impl_->compress_mode_);
         InputBuffer name_buf(data, data_len);
         ptr_offset = impl_->findOffset(getBuffer(), name_buf,
-                                       bucket_ids[nlabels_uncomp],
+                                       impl_->seq_hashes_[nlabels_uncomp],
                                        case_sensitive);
         if (ptr_offset != MessageRendererImpl::NO_OFFSET) {
             break;
@@ -343,9 +350,9 @@ MessageRenderer::writeName(const Name& name, const bool compress) {
         if (offset > Name::MAX_COMPRESS_POINTER) {
             break;
         }
-        // Store the <offset, len> pair to the table.  We already know the
-        // hash value and the bucket ID derived from it.
-        impl_->addOffset(bucket_ids[i], offset, seqlen);
+        // Store the tuple of <hash, offset, len> to the table.  Note that we
+        // already know the hash value for each name.
+        impl_->addOffset(impl_->seq_hashes_[i], offset, seqlen);
         offset += (label_len + 1);
         seqlen -= (label_len + 1);
     }
diff --git a/src/lib/dns/name_internal.h b/src/lib/dns/name_internal.h
index 143ecb3..e1eab8c 100644
--- a/src/lib/dns/name_internal.h
+++ b/src/lib/dns/name_internal.h
@@ -21,6 +21,12 @@
 // MessageRenderer).  It's not expected to be used even by normal applications.
 // This header file is therefore not expected to be installed as part of the
 // library.
+//
+// Note: if it turns out that we need this shortcut for many other places
+// we may even want to make it expose to other BIND 10 modules, but for now
+// we'll keep it semi-private (note also that except for very performance
+// sensitive applications the standard std::tolower() function should be just
+// sufficient).
 namespace isc {
 namespace dns {
 namespace name {
diff --git a/src/lib/util/buffer.h b/src/lib/util/buffer.h
index 0e91d2e..f27e941 100644
--- a/src/lib/util/buffer.h
+++ b/src/lib/util/buffer.h
@@ -225,7 +225,7 @@ private:
     /// \brief A common helper to throw an exception on invalid operation.
     ///
     /// Experiments showed that throwing from each method makes the buffer
-    /// operation thrower, so we consolidate it here, and let the methods
+    /// operation slower, so we consolidate it here, and let the methods
     /// call this.
     static void throwError(const char* msg) {
         isc_throw(InvalidBufferPosition, msg);