BIND 10 trac1603, updated. 0b55e11e55f2422e2aa1f920f48debac92d826af [1603] moved the array for hash to the impl class and use boost::array for it.

Fri Mar 9 18:46:53 UTC 2012

The branch, trac1603 has been updated
       via  0b55e11e55f2422e2aa1f920f48debac92d826af (commit)
       via  c3962c4f4d5f08e3ff194132a6308dbede212997 (commit)
       via  91a89cb8cac804441d957430593c43f40645a44d (commit)
       via  8123c01dbd4f71cdff0ba73a45343ff51de0908e (commit)
       via  914212e06d69ae4e24ac673b050dd60a7eadfbae (commit)
      from  b1fb64710cddf53857625d7a871f82775e20d93b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0b55e11e55f2422e2aa1f920f48debac92d826af
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:44:21 2012 -0800

    [1603] moved the array for hash to the impl class and use boost::array for it.
    
    use array.at() for write to ensure safety.  it's a bit costly, but performance
    penalty doesn't seem to be big, so it should be a good tradeoff.

commit c3962c4f4d5f08e3ff194132a6308dbede212997
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:22:58 2012 -0800

    [1603] added some additional notes about maptolower array.

commit 91a89cb8cac804441d957430593c43f40645a44d
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:19:06 2012 -0800

    [1603] fixed a typo

commit 8123c01dbd4f71cdff0ba73a45343ff51de0908e
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:15:04 2012 -0800

    [1603] cleanup: constify a member function.

commit 914212e06d69ae4e24ac673b050dd60a7eadfbae
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Mar 9 10:13:32 2012 -0800

    [1603] store hash value for the name stored in OffsetItem.
    
    this may make comparison a bit more efficient.  suggested in review.

-----------------------------------------------------------------------

Summary of changes:
 src/lib/dns/messagerenderer.cc |   57 ++++++++++++++++++++++-----------------
 src/lib/dns/name_internal.h    |    6 ++++
 src/lib/util/buffer.h          |    2 +-
 3 files changed, 39 insertions(+), 26 deletions(-)

-----------------------------------------------------------------------

diff --git a/src/lib/dns/messagerenderer.cc b/src/lib/dns/messagerenderer.cc
index 7591b03..1ac5cdf 100644
--- a/src/lib/dns/messagerenderer.cc
+++ b/src/lib/dns/messagerenderer.cc
@@ -19,6 +19,7 @@
 #include <dns/labelsequence.h>
 #include <dns/messagerenderer.h>
 
+#include <boost/array.hpp>
 #include <boost/static_assert.hpp>
 
 #include <limits>
@@ -42,9 +43,14 @@ namespace {     // hide internal-only names from the public namespaces
 /// longest match (ancestor) name against each new name to be rendered into
 /// the buffer.
 struct OffsetItem {
-    OffsetItem(size_t pos, size_t len) : pos_(pos), len_(len)
+    OffsetItem(size_t hash, size_t pos, size_t len) :
+        hash_(hash), pos_(pos), len_(len)
     {}
 
+    /// The hash value for the stored name calculated by LabelSequence.getHash.
+    /// This will help make name comparison in \c NameCompare more efficient.
+    size_t hash_;
+
     /// The position (offset from the beginning) in the buffer where the
     /// name starts.
     uint16_t pos_;
@@ -68,12 +74,16 @@ struct NameCompare {
     /// \param buffer The buffer for rendering used in the caller renderer
     /// \param name_buf An input buffer storing the wire-format data of the
     /// name to be newly rendered (and only that data).
-    NameCompare(const OutputBuffer& buffer, InputBuffer& name_buf) :
-        buffer_(&buffer), name_buf_(&name_buf)
+    /// \param hash The hash value for the name.
+    NameCompare(const OutputBuffer& buffer, InputBuffer& name_buf,
+                size_t hash) :
+        buffer_(&buffer), name_buf_(&name_buf), hash_(hash)
     {}
 
     bool operator()(const OffsetItem& item) const {
-        if (item.len_ != name_buf_->getLength()) {
+        // Trivial inequality check.  If either the hash or the total length
+        // doesn't match, the names are obviously different.
+        if (item.hash_  != hash_ || item.len_ != name_buf_->getLength()) {
             return (false);
         }
 
@@ -134,6 +144,7 @@ private:
 
     const OutputBuffer* buffer_;
     InputBuffer* name_buf_;
+    const size_t hash_;
 };
 }
 
@@ -167,24 +178,24 @@ struct MessageRenderer::MessageRendererImpl {
         }
     }
 
-    uint16_t findOffset(const OutputBuffer& buffer,
-                        InputBuffer& name_buf,
-                        size_t bucket_id, bool case_sensitive)
+    uint16_t findOffset(const OutputBuffer& buffer, InputBuffer& name_buf,
+                        size_t hash, bool case_sensitive) const
     {
         // Find a matching entry, if any.  We use some heuristics here: often
         // the same name appers consecutively (like repeating the same owner
         // name for a single RRset), so in case there's a collision in the
         // bucket it will be more likely to find it in the tail side of the
         // bucket.
+        const size_t bucket_id = hash % BUCKETS;
         vector<OffsetItem>::const_reverse_iterator found;
         if (case_sensitive) {
             found = find_if(table_[bucket_id].rbegin(),
                             table_[bucket_id].rend(),
-                            NameCompare<true>(buffer, name_buf));
+                            NameCompare<true>(buffer, name_buf, hash));
         } else {
             found = find_if(table_[bucket_id].rbegin(),
                             table_[bucket_id].rend(),
-                            NameCompare<false>(buffer, name_buf));
+                            NameCompare<false>(buffer, name_buf, hash));
         }
         if (found != table_[bucket_id].rend()) {
             return (found->pos_);
@@ -192,8 +203,8 @@ struct MessageRenderer::MessageRendererImpl {
         return (NO_OFFSET);
     }
 
-    void addOffset(size_t bucket_id, size_t offset, size_t len) {
-        table_[bucket_id].push_back(OffsetItem(offset, len));
+    void addOffset(size_t hash, size_t offset, size_t len) {
+        table_[hash % BUCKETS].push_back(OffsetItem(hash, offset, len));
     }
 
     // The hash table for the (offset + position in the buffer) entries
@@ -206,6 +217,9 @@ struct MessageRenderer::MessageRendererImpl {
     bool truncated_;
     /// The name compression mode.
     CompressMode compress_mode_;
+
+    // Placeholder for hash values as they are calculated in writeName().
+    boost::array<size_t, Name::MAX_LABELS> seq_hashes_;
 };
 
 MessageRenderer::MessageRenderer() :
@@ -280,13 +294,6 @@ MessageRenderer::writeName(const Name& name, const bool compress) {
     size_t data_len;
     const char* data;
 
-    // We store hash bucket ID for label sequences derived from the name
-    // in order to avoid calculating the hash twice.  The assert ensures
-    // uint8_t is sufficient for our table.
-    uint8_t bucket_ids[Name::MAX_LABELS];
-    BOOST_STATIC_ASSERT((1 << numeric_limits<uint8_t>::digits) >
-                        MessageRendererImpl::BUCKETS);
-
     // Find the offset in the offset table whose name gives the longest
     // match against the name to be rendered.
     size_t nlabels_uncomp;
@@ -299,12 +306,12 @@ MessageRenderer::writeName(const Name& name, const bool compress) {
             ++nlabels_uncomp;
             break;
         }
-        bucket_ids[nlabels_uncomp] =
-            (sequence.getHash(impl_->compress_mode_) %
-             MessageRendererImpl::BUCKETS);
+        // write with range check for safety
+        impl_->seq_hashes_.at(nlabels_uncomp) =
+            sequence.getHash(impl_->compress_mode_);
         InputBuffer name_buf(data, data_len);
         ptr_offset = impl_->findOffset(getBuffer(), name_buf,
-                                       bucket_ids[nlabels_uncomp],
+                                       impl_->seq_hashes_[nlabels_uncomp],
                                        case_sensitive);
         if (ptr_offset != MessageRendererImpl::NO_OFFSET) {
             break;
@@ -343,9 +350,9 @@ MessageRenderer::writeName(const Name& name, const bool compress) {
         if (offset > Name::MAX_COMPRESS_POINTER) {
             break;
         }
-        // Store the <offset, len> pair to the table.  We already know the
-        // hash value and the bucket ID derived from it.
-        impl_->addOffset(bucket_ids[i], offset, seqlen);
+        // Store the tuple of <hash, offset, len> to the table.  Note that we
+        // already know the hash value for each name.
+        impl_->addOffset(impl_->seq_hashes_[i], offset, seqlen);
         offset += (label_len + 1);
         seqlen -= (label_len + 1);
     }
diff --git a/src/lib/dns/name_internal.h b/src/lib/dns/name_internal.h
index 143ecb3..e1eab8c 100644
--- a/src/lib/dns/name_internal.h
+++ b/src/lib/dns/name_internal.h
@@ -21,6 +21,12 @@
 // MessageRenderer).  It's not expected to be used even by normal applications.
 // This header file is therefore not expected to be installed as part of the
 // library.
+//
+// Note: if it turns out that we need this shortcut for many other places
+// we may even want to make it expose to other BIND 10 modules, but for now
+// we'll keep it semi-private (note also that except for very performance
+// sensitive applications the standard std::tolower() function should be just
+// sufficient).
 namespace isc {
 namespace dns {
 namespace name {
diff --git a/src/lib/util/buffer.h b/src/lib/util/buffer.h
index b50a5d8..a885db1 100644
--- a/src/lib/util/buffer.h
+++ b/src/lib/util/buffer.h
@@ -224,7 +224,7 @@ private:
     /// \brief A common helper to throw an exception on invalid operation.
     ///
     /// Experiments showed that throwing from each method makes the buffer
-    /// operation thrower, so we consolidate it here, and let the methods
+    /// operation slower, so we consolidate it here, and let the methods
     /// call this.
     static void throwError(const char* msg) {
         isc_throw(InvalidBufferPosition, msg);