BIND 10 trac2370, updated. 33fd1fb0ed8ebfceb01e8a84b1196af29dffde92 [2370] overall documentation updates

Sat Oct 27 00:12:26 UTC 2012

The branch, trac2370 has been updated
       via  33fd1fb0ed8ebfceb01e8a84b1196af29dffde92 (commit)
       via  844348f6a9c5d4670f96750a2c94ade6778891d9 (commit)
       via  57b907eddc75241956102dd4de70d882c1b1fe63 (commit)
       via  a311ab52fe83d602a2ac3daa12314df7de258bae (commit)
       via  1b63f7d9df1621053c71ec3ef546a8cae024dffb (commit)
      from  a54864927bc88eebbfb0f9515207ba9a3cacaa3e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 33fd1fb0ed8ebfceb01e8a84b1196af29dffde92
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Oct 26 17:11:26 2012 -0700

    [2370] overall documentation updates

commit 844348f6a9c5d4670f96750a2c94ade6778891d9
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Oct 26 16:01:34 2012 -0700

    [2370] separate error type token to support error code

commit 57b907eddc75241956102dd4de70d882c1b1fe63
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Oct 26 14:39:53 2012 -0700

    [2370] introduced error codes for ERROR-type token

commit a311ab52fe83d602a2ac3daa12314df7de258bae
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Oct 26 14:23:16 2012 -0700

    [2370] cleanup: move the def of Token class out of Lexer for readability.

commit 1b63f7d9df1621053c71ec3ef546a8cae024dffb
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Fri Oct 26 11:37:58 2012 -0700

    [2370] added some more tests, comment fixes

-----------------------------------------------------------------------

Summary of changes:
 src/lib/dns/Makefile.am                            |    2 +-
 .../unittests/resource.cc => dns/master_lexer.cc}  |   36 ++-
 src/lib/dns/master_lexer.h                         |  247 +++++++++++++++-----
 src/lib/dns/tests/master_lexer_token_unittest.cc   |   76 ++++--
 4 files changed, 277 insertions(+), 84 deletions(-)
 copy src/lib/{util/unittests/resource.cc => dns/master_lexer.cc} (51%)

-----------------------------------------------------------------------

diff --git a/src/lib/dns/Makefile.am b/src/lib/dns/Makefile.am
index f1c2d08..5cf0732 100644
--- a/src/lib/dns/Makefile.am
+++ b/src/lib/dns/Makefile.am
@@ -95,7 +95,7 @@ libb10_dns___la_SOURCES += edns.h edns.cc
 libb10_dns___la_SOURCES += exceptions.h exceptions.cc
 libb10_dns___la_SOURCES += labelsequence.h labelsequence.cc
 libb10_dns___la_SOURCES += masterload.h masterload.cc
-libb10_dns___la_SOURCES += master_lexer.h
+libb10_dns___la_SOURCES += master_lexer.h master_lexer.cc
 libb10_dns___la_SOURCES += message.h message.cc
 libb10_dns___la_SOURCES += messagerenderer.h messagerenderer.cc
 libb10_dns___la_SOURCES += name.h name.cc
diff --git a/src/lib/dns/master_lexer.cc b/src/lib/dns/master_lexer.cc
new file mode 100644
index 0000000..2a5c886
--- /dev/null
+++ b/src/lib/dns/master_lexer.cc
@@ -0,0 +1,47 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <dns/master_lexer.h>
+
+#include <cassert>
+#include <string>
+
+namespace {
+const char* const error_text[] = {
+    "lexer not started",        // NOT_STARTED
+    "unbalanced parentheses",   // UNBALANCED_PAREN
+    "unexpected end of input",  // UNEXPECTED_END
+    "unbalanced quotes"         // UNBALANCED_QUOTES
+};
+const size_t error_text_max_count = sizeof(error_text) / sizeof(error_text[0]);
+}
+
+namespace isc {
+namespace dns {
+
+std::string
+MasterLexer::Token::getErrorText() const {
+    if (type_ != ERROR) {
+        isc_throw(InvalidOperation,
+                  "Token::getErrorText() for non error type");
+    }
+
+    // The class integrity ensures the following:
+    assert(val_.error_code_ < error_text_max_count);
+    return (error_text[val_.error_code_]);
+}
+
+
+} // end of namespace dns
+} // end of namespace isc
diff --git a/src/lib/dns/master_lexer.h b/src/lib/dns/master_lexer.h
index 5af2c06..eb37ba3 100644
--- a/src/lib/dns/master_lexer.h
+++ b/src/lib/dns/master_lexer.h
@@ -26,70 +26,205 @@ namespace dns {
 
 class MasterLexer {
 public:
-    class Token {
-    public:
-        enum Type {
-            ERROR,
-            END_OF_LINE,
-            END_OF_FILE,
-            INITIAL_WS,
-            STRING,
-            QSTRING,
-            NUMBER
-        };
-
-        struct StringRegion {
-            const char* beg;
-            size_t len;
-        };
-
-        explicit Token(Type type) : type_(type) {
-            if (type >= STRING) {
-                isc_throw(InvalidParameter, "Token per-type constructor "
-                          "called with invalid type: " << type);
-            }
-        }
-        Token(const char* str_beg, size_t str_len, bool quoted = false) :
-            type_(quoted ? QSTRING : STRING)
-        {
-            val_.str_region_.beg = str_beg;
-            val_.str_region_.len = str_len;
+    class Token;       // we define it separate for better readability
+};
+
+/// \brief Tokens for \c MasterLexer
+///
+/// This is a simple value-class encapsulating a type of a lexer token and
+/// (if it has a value) its value.  Essentially, the class provides
+/// constructors corresponding to different types of tokens, and corresponding
+/// getter methods.  The type and value are fixed at the time of construction
+/// and will never be modified throughout the lifetime of the object.
+/// The getter methods are still provided to maximize the safety; an
+/// application cannot refer to a value that is invalid for the type of token.
+///
+/// This class is intentionally implemented as copyable and assignable
+/// (using the default version of copy constructor and assignment operator),
+/// but it's mainly for internal implementation convenience.  Applications will
+/// simply refer to Token object as a reference via the \c MasterLexer class.
+class MasterLexer::Token {
+public:
+    /// \brief Enumeration for token types
+    enum Type {
+        END_OF_LINE, ///< End of line detected (if asked for detecting it)
+        END_OF_FILE, ///< End of file detected (if asked for detecting it)
+        INITIAL_WS,  ///< White spaces at the beginning of a line
+        NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
+                                       /// no-value (type only) types.
+                                       /// Mainly for internal use.
+        STRING, ///< A single string
+        QSTRING, ///< A single string quoted by double-quotes (").
+        NUMBER,  ///< A decimal number (unsigned 32-bit)
+        ERROR    ///< Error detected in getting a token
+    };
+
+    /// \brief Enumeration for lexer error codes
+    enum ErrorCode {
+        NOT_STARTED, ///< The lexer is just initialized and has no token
+        UNBALANCED_PAREN,       ///< Unbalanced parentheses detected
+        UNEXPECTED_END, ///< The lexer reaches the end of line or file
+                       /// unexpectedly
+        UNBALANCED_QUOTES,      ///< Unbalanced quotations detected
+        MAX_ERROR_CODE ///< Max integer corresponding to valid error codes.
+                       /// (excluding this one). Mainly for internal use.
+    };
+
+    /// \brief A simple representation of a range of a string.
+    ///
+    /// This is a straightforward pair of the start pointer of a string
+    /// and its length.  The \c STRING and \c QSTRING types of tokens
+    /// will be primarily represented in this form.
+    ///
+    /// Any character can be stored in the valid range of the region.
+    /// In particular, there can be a nul character (\0) in the middle of
+    /// the region.  On the other hand, it is not ensured that the string
+    /// is nul-terminated.  So the usual string manipulation API may not work
+    /// as expected.
+    struct StringRegion {
+        const char* beg;        ///< The start address of the string
+        size_t len;             ///< The length of the string in bytes
+    };
+
+    /// \brief Constructor for non-value type of token.
+    ///
+    /// \throw InvalidParameter A value type token is specified.
+    /// \param type The type of the token.  It must indicate a non-value
+    /// type (not larger than \c NOVALUE_TYPE_MAX).
+    explicit Token(Type type) : type_(type) {
+        if (type > NOVALUE_TYPE_MAX) {
+            isc_throw(InvalidParameter, "Token per-type constructor "
+                      "called with invalid type: " << type);
         }
-        explicit Token(uint32_t number) : type_(NUMBER) {
-            val_.number_ = number;
+    }
+
+    /// \brief Constructor for string and quoted-string types of token.
+    ///
+    /// The optional \c quoted parameter specifies whether it's a quoted or
+    /// non quoted string.
+    ///
+    /// The string is specified as a pair of a pointer to the start address
+    /// and its length.  Any character can be contained in any position of
+    /// the valid range (see \c StringRegion).
+    ///
+    /// When it's a quoted string, the quotation marks must be excluded
+    /// from the specified range.
+    ///
+    /// \param str_beg The start address of the string
+    /// \param str_len The size of the string in bytes
+    /// \param quoted true if it's a quoted string; false otherwise.
+    Token(const char* str_beg, size_t str_len, bool quoted = false) :
+        type_(quoted ? QSTRING : STRING)
+    {
+        val_.str_region_.beg = str_beg;
+        val_.str_region_.len = str_len;
+    }
+
+    /// \brief Constructor for number type of token.
+    ///
+    /// \brief number An unsigned 32-bit integer corresponding to the token
+    /// value.
+    explicit Token(uint32_t number) : type_(NUMBER) {
+        val_.number_ = number;
+    }
+
+    /// \brief Constructor for error type of token.
+    ///
+    /// \throw InvalidParameter Invalid error code value is specified.
+    /// \brief error_code A pre-defined constant of \c ErrorCode.
+    explicit Token(ErrorCode error_code) : type_(ERROR) {
+        if (!(error_code < MAX_ERROR_CODE)) {
+            isc_throw(InvalidParameter, "Invalid master lexer error code: "
+                      << error_code);
         }
+        val_.error_code_ = error_code;
+    }
 
-        Type getType() const { return (type_); }
-        std::string getString() const {
-            if (type_ != STRING && type_ != QSTRING) {
-                isc_throw(InvalidOperation,
-                          "Token::getString() for non string-variant type");
-            }
-            return (std::string(val_.str_region_.beg,
-                                val_.str_region_.beg + val_.str_region_.len));
+    /// \brief Return the token type.
+    ///
+    /// \throw none
+    Type getType() const { return (type_); }
+
+    /// \brief Return the value of a string-variant token.
+    ///
+    /// \throw InvalidOperation Called on a non string-variant types of token.
+    /// \return A reference to \c StringRegion corresponding to the string
+    ///         token value.
+    const StringRegion& getStringRegion() const {
+        if (type_ != STRING && type_ != QSTRING) {
+            isc_throw(InvalidOperation,
+                      "Token::getStringRegion() for non string-variant type");
         }
-        const StringRegion& getStringRegion() const {
-            if (type_ != STRING && type_ != QSTRING) {
-                isc_throw(InvalidOperation,
-                          "Token::getString() for non string-variant type");
-            }
-            return (val_.str_region_);
+        return (val_.str_region_);
+    }
+
+    /// \brief Return the value of a string-variant token as a string object.
+    ///
+    /// Note that the underlying string may contain a nul (\0) character
+    /// in the middle.  The returned string object will contain all characters
+    /// of the valid range of the underlying string.  So some string
+    /// operations such as c_str() may not work as expected.
+    ///
+    /// \throw InvalidOperation Called on a non string-variant types of token.
+    /// \throw std::bad_alloc Resource allocation failure in constructing the
+    ///                       string object.
+    /// \return A std::string object corresponding to the string token value.
+    std::string getString() const {
+        if (type_ != STRING && type_ != QSTRING) {
+            isc_throw(InvalidOperation,
+                      "Token::getString() for non string-variant type");
         }
-        uint32_t getNumber() const {
-            if (type_ != NUMBER) {
-                isc_throw(InvalidOperation,
-                          "Token::getNumber() for non number type");
-            }
-            return (val_.number_);
+        return (std::string(val_.str_region_.beg,
+                            val_.str_region_.beg + val_.str_region_.len));
+    }
+
+    /// \brief Return the value of a string-variant token as a string object.
+    ///
+    /// \throw InvalidOperation Called on a non number type of token.
+    /// \return The integer corresponding to the number token value.
+    uint32_t getNumber() const {
+        if (type_ != NUMBER) {
+            isc_throw(InvalidOperation,
+                      "Token::getNumber() for non number type");
         }
+        return (val_.number_);
+    }
 
-    private:
-        Type type_;
-        union {
-            StringRegion str_region_;
-            uint32_t number_;
-        } val_;
+    /// \brief Return the error code of a error type token.
+    ///
+    /// \throw InvalidOperation Called on a non error type of token.
+    /// \return The error code of the token.
+    ErrorCode getErrorCode() const {
+        if (type_ != ERROR) {
+            isc_throw(InvalidOperation,
+                      "Token::getErrorCode() for non error type");
+        }
+        return (val_.error_code_);
     };
+
+    /// \brief Return a textual description of the error of a error type token.
+    ///
+    /// The returned string would be useful to produce a log message when
+    /// a zone file parser encounters an error.
+    ///
+    /// \throw InvalidOperation Called on a non error type of token.
+    /// \throw std::bad_alloc Resource allocation failure in constructing the
+    ///                       string object.
+    /// \return A string object that describes the meaning of the error.
+    std::string getErrorText() const;
+
+private:
+    Type type_;    // this is not const so the class can be assignable
+
+    // We use a union to represent different types of token values via the
+    // unified Token class.  The class integrity should ensure valid operation
+    // on the union; getter methods should only refer to the member set at
+    // the construction.
+    union {
+        StringRegion str_region_;
+        uint32_t number_;
+        ErrorCode error_code_;
+    } val_;
 };
 
 } // namespace dns
diff --git a/src/lib/dns/tests/master_lexer_token_unittest.cc b/src/lib/dns/tests/master_lexer_token_unittest.cc
index a43d6e3..a7a8acc 100644
--- a/src/lib/dns/tests/master_lexer_token_unittest.cc
+++ b/src/lib/dns/tests/master_lexer_token_unittest.cc
@@ -25,28 +25,41 @@ using namespace isc::dns;
 namespace {
 
 const char TEST_STRING[] = "string token";
+// This excludes the ending \0 character
+const size_t TEST_STRING_LEN = sizeof(TEST_STRING) - 1;
 
 class MasterLexerTokenTest : public ::testing::Test {
 public:
     MasterLexerTokenTest() :
-        token_err(MasterLexer::Token::ERROR),
-        token_str(TEST_STRING, sizeof(TEST_STRING) - 1), // excluding ending 0
-        token_num(42)
+        token_eof(MasterLexer::Token::END_OF_FILE),
+        token_str(TEST_STRING, TEST_STRING_LEN),
+        token_num(42),
+        token_err(MasterLexer::Token::UNEXPECTED_END)
     {}
 
-    const MasterLexer::Token token_err;
+    const MasterLexer::Token token_eof; // an example of non-value type token
     const MasterLexer::Token token_str;
     const MasterLexer::Token token_num;
+    const MasterLexer::Token token_err;
 };
 
 
 TEST_F(MasterLexerTokenTest, strings) {
+    // basic construction and getter checks
     EXPECT_EQ(MasterLexer::Token::STRING, token_str.getType());
     EXPECT_EQ(std::string("string token"), token_str.getString());
     const MasterLexer::Token::StringRegion str_region =
         token_str.getStringRegion();
     EXPECT_EQ(TEST_STRING, str_region.beg);
-    EXPECT_EQ(sizeof(TEST_STRING) - 1, str_region.len);
+    EXPECT_EQ(TEST_STRING_LEN, str_region.len);
+
+    // Even if the stored string contains a nul character (in this case,
+    // it happens to be at the end of the string, but could be in the middle),
+    // getString() should return a string object containing the nul.
+    std::string expected_str("string token");
+    expected_str.push_back('\0');
+    EXPECT_EQ(expected_str,
+              MasterLexer::Token(TEST_STRING, TEST_STRING_LEN + 1).getString());
 
     // Construct type of qstring
     EXPECT_EQ(MasterLexer::Token::QSTRING,
@@ -58,9 +71,9 @@ TEST_F(MasterLexerTokenTest, strings) {
               getType());
 
     // getString/StringRegion() aren't allowed for non string(-variant) types
-    EXPECT_THROW(token_err.getString(), isc::InvalidOperation);
+    EXPECT_THROW(token_eof.getString(), isc::InvalidOperation);
     EXPECT_THROW(token_num.getString(), isc::InvalidOperation);
-    EXPECT_THROW(token_err.getStringRegion(), isc::InvalidOperation);
+    EXPECT_THROW(token_eof.getStringRegion(), isc::InvalidOperation);
     EXPECT_THROW(token_num.getStringRegion(), isc::InvalidOperation);
 }
 
@@ -78,24 +91,23 @@ TEST_F(MasterLexerTokenTest, numbers) {
     EXPECT_EQ(MasterLexer::Token::NUMBER, token.getType());
 
     // it's okay to replace it with a different type of token
-    token = token_err;
-    EXPECT_EQ(MasterLexer::Token::ERROR, token.getType());
+    token = token_eof;
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token.getType());
 
     // Possible max value
     token = MasterLexer::Token(0xffffffff);
     EXPECT_EQ(4294967295u, token.getNumber());
 
-    // TBD: getNumber for other type
-    EXPECT_THROW(token_err.getNumber(), isc::InvalidOperation);
+    // getNumber() isn't allowed for non number types
+    EXPECT_THROW(token_eof.getNumber(), isc::InvalidOperation);
+    EXPECT_THROW(token_str.getNumber(), isc::InvalidOperation);
 }
 
-TEST_F(MasterLexerTokenTest, specials) {
+TEST_F(MasterLexerTokenTest, novalues) {
     // Just checking we can construct them and getType() returns correct value.
-    EXPECT_EQ(MasterLexer::Token::ERROR, token_err.getType());
+    EXPECT_EQ(MasterLexer::Token::END_OF_FILE, token_eof.getType());
     EXPECT_EQ(MasterLexer::Token::END_OF_LINE,
               MasterLexer::Token(MasterLexer::Token::END_OF_LINE).getType());
-    EXPECT_EQ(MasterLexer::Token::END_OF_FILE,
-              MasterLexer::Token(MasterLexer::Token::END_OF_FILE).getType());
     EXPECT_EQ(MasterLexer::Token::INITIAL_WS,
               MasterLexer::Token(MasterLexer::Token::INITIAL_WS).getType());
 
@@ -106,5 +118,39 @@ TEST_F(MasterLexerTokenTest, specials) {
                  isc::InvalidParameter);
     EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::NUMBER),
                  isc::InvalidParameter);
+    EXPECT_THROW(MasterLexer::Token t(MasterLexer::Token::ERROR),
+                 isc::InvalidParameter);
+}
+
+TEST_F(MasterLexerTokenTest, errors) {
+    EXPECT_EQ(MasterLexer::Token::ERROR, token_err.getType());
+    EXPECT_EQ(MasterLexer::Token::UNEXPECTED_END, token_err.getErrorCode());
+    EXPECT_EQ("unexpected end of input", token_err.getErrorText());
+    EXPECT_EQ("lexer not started",
+              MasterLexer::Token(MasterLexer::Token::NOT_STARTED).
+              getErrorText());
+    EXPECT_EQ("unbalanced parentheses",
+              MasterLexer::Token(MasterLexer::Token::UNBALANCED_PAREN).
+              getErrorText());
+    EXPECT_EQ("unbalanced quotes",
+              MasterLexer::Token(MasterLexer::Token::UNBALANCED_QUOTES).
+              getErrorText());
+
+    // getErrorCode/Text() isn't allowed for non number types
+    EXPECT_THROW(token_num.getErrorCode(), isc::InvalidOperation);
+    EXPECT_THROW(token_num.getErrorText(), isc::InvalidOperation);
+
+    // Only the pre-defined error code is accepted.  Hardcoding '4' (max code
+    // + 1) is intentional; it'd be actually better if we notice it when we
+    // update the enum list (which shouldn't happen too often).
+    EXPECT_THROW(MasterLexer::Token(MasterLexer::Token::ErrorCode(4)),
+                 isc::InvalidParameter);
+
+    // Check the coexistence of "from number" and "from error-code"
+    // constructors won't cause confusion.
+    EXPECT_EQ(MasterLexer::Token::NUMBER,
+              MasterLexer::Token(static_cast<uint32_t>(
+                                     MasterLexer::Token::NOT_STARTED)).
+              getType());
 }
 }