BIND 10 master, updated. 9a11ef62ae36d9d891e87ba792fa249ae82f0736 [master] Merge branch 'trac2372'

Tue Nov 13 21:13:49 UTC 2012

The branch, master has been updated
       via  9a11ef62ae36d9d891e87ba792fa249ae82f0736 (commit)
       via  30b659a84d2d1beedc9adc53fbfd2ff71587dea5 (commit)
       via  15d6d71ed052c0dc5d6be39a8afe484014887b31 (commit)
       via  a3dde49a8210f65d1f37c9dee5638dc62ed92dd3 (commit)
       via  6a2d1a5cbdeb2603982abbf148012e0879bba4ee (commit)
       via  9fb295c549978ba217384643beb60b25b52ff35a (commit)
       via  a49d071d0040474aa3fb23cb0e79c92f51f54f71 (commit)
       via  626a7de9ad0d9cc7324e024b41f55ef636ee7957 (commit)
       via  3446f7e74be156440c4e6b0333955b4c84e5443f (commit)
       via  573c996957cb374223a921c0ec27a5218315b865 (commit)
       via  3be2894bafa1ac7fdc9cbdfd81b9eb0b1aba16d6 (commit)
       via  3ce2efa154f8eff8fd7029925ffd29116e2be8fa (commit)
       via  36c280eef8a417854f7bfd2c7ad1356a8d52dcee (commit)
       via  6e50ccbf961a340686ed9a6cc4011a7d9a867ccb (commit)
       via  0c5e6acbe260c8095c3981526d3678e48f2faca9 (commit)
       via  54ac6712c946aad84cd21330a08c3064e28ab91d (commit)
       via  60977cf7528acb2bcc00067cb6a88a8136445453 (commit)
       via  cfc02b5aba598d12a89d51908beade5f3aaaf40f (commit)
       via  5fd8dced290ab771f853ba6ae3127f16a2ac0689 (commit)
       via  c1799647fcc9e61a46b254da71ab0dcb24660885 (commit)
       via  9fcdf4d524b54dc80a2b68eb800861f1727330c8 (commit)
       via  182de87a7fd651f2b70e603b470ec71b8b3d48f0 (commit)
       via  d4902d224ce5d0ff1f7f289fd6ba32b0152dc5a3 (commit)
       via  a5767f8d4eaa3e94b157fc00211a0d0fc3a4a989 (commit)
       via  65fee8d161688cc74df9f14b641b64bc5117cd1f (commit)
       via  5044adfc3df675aad9dc287af4a3d799d40dfd80 (commit)
       via  b7939a2d63141a7920491f5f4c13c62eb13cf88c (commit)
       via  65fffc8b2838d3d08d9aba54ad9b41099c9e32f7 (commit)
       via  8baae428236f887b53988cf0c7b52c3275acd531 (commit)
       via  d6ed107c38459ad3c3f0b3f74475dda4707d7f23 (commit)
       via  150c7dbba7294cf89dae06cafb3e07f407aa2062 (commit)
       via  2645b4f705c2793e4a40bc4604a953da69ec2506 (commit)
      from  06069bf9520dc24133605c0ef19ffdeb3414d669 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9a11ef62ae36d9d891e87ba792fa249ae82f0736
Merge: 06069bf 30b659a
Author: JINMEI Tatuya <jinmei at isc.org>
Date:   Tue Nov 13 13:12:20 2012 -0800

    [master] Merge branch 'trac2372'

-----------------------------------------------------------------------

Summary of changes:
 src/lib/dns/Makefile.am                          |    1 +
 src/lib/dns/master_lexer.cc                      |  170 +++++++++++++-
 src/lib/dns/master_lexer.h                       |   29 ++-
 src/lib/dns/master_lexer_state.h                 |  138 ++++++++++++
 src/lib/dns/tests/Makefile.am                    |    1 +
 src/lib/dns/tests/master_lexer_state_unittest.cc |  256 ++++++++++++++++++++++
 6 files changed, 592 insertions(+), 3 deletions(-)
 create mode 100644 src/lib/dns/master_lexer_state.h
 create mode 100644 src/lib/dns/tests/master_lexer_state_unittest.cc

-----------------------------------------------------------------------

diff --git a/src/lib/dns/Makefile.am b/src/lib/dns/Makefile.am
index e81ef76..14b74f7 100644
--- a/src/lib/dns/Makefile.am
+++ b/src/lib/dns/Makefile.am
@@ -97,6 +97,7 @@ libb10_dns___la_SOURCES += master_lexer_inputsource.h master_lexer_inputsource.c
 libb10_dns___la_SOURCES += labelsequence.h labelsequence.cc
 libb10_dns___la_SOURCES += masterload.h masterload.cc
 libb10_dns___la_SOURCES += master_lexer.h master_lexer.cc
+libb10_dns___la_SOURCES += master_lexer_state.h
 libb10_dns___la_SOURCES += message.h message.cc
 libb10_dns___la_SOURCES += messagerenderer.h messagerenderer.cc
 libb10_dns___la_SOURCES += name.h name.cc
diff --git a/src/lib/dns/master_lexer.cc b/src/lib/dns/master_lexer.cc
index c9c5528..992a051 100644
--- a/src/lib/dns/master_lexer.cc
+++ b/src/lib/dns/master_lexer.cc
@@ -16,6 +16,7 @@
 
 #include <dns/master_lexer.h>
 #include <dns/master_lexer_inputsource.h>
+#include <dns/master_lexer_state.h>
 
 #include <boost/shared_ptr.hpp>
 
@@ -32,10 +33,34 @@ typedef boost::shared_ptr<master_lexer_internal::InputSource> InputSourcePtr;
 using namespace master_lexer_internal;
 
 struct MasterLexer::MasterLexerImpl {
-    MasterLexerImpl() : token_(Token::NOT_STARTED) {}
+    MasterLexerImpl() : source_(NULL), token_(Token::NOT_STARTED),
+                        paren_count_(0), last_was_eol_(false)
+    {}
+
+    // A helper method to skip possible comments toward the end of EOL or EOF.
+    // commonly used by state classes.  It returns the corresponding "end-of"
+    // character in case it's a comment; otherwise it simply returns the
+    // current character.
+    int skipComment(int c) {
+        if (c == ';') {
+            while (true) {
+                c = source_->getChar();
+                if (c == '\n' || c == InputSource::END_OF_STREAM) {
+                    return (c);
+                }
+            }
+        }
+        return (c);
+    }
 
     std::vector<InputSourcePtr> sources_;
-    Token token_;
+    InputSource* source_;       // current source (NULL if sources_ is empty)
+    Token token_;               // currently recognized token (set by a state)
+
+    // These are used in states, and defined here only as a placeholder.
+    // The main lexer class does not need these members.
+    size_t paren_count_;        // nest count of the parentheses
+    bool last_was_eol_; // whether the lexer just passed an end-of-line
 };
 
 MasterLexer::MasterLexer() : impl_(new MasterLexerImpl) {
@@ -60,12 +85,14 @@ MasterLexer::pushSource(const char* filename, std::string* error) {
         return (false);
     }
 
+    impl_->source_ = impl_->sources_.back().get();
     return (true);
 }
 
 void
 MasterLexer::pushSource(std::istream& input) {
     impl_->sources_.push_back(InputSourcePtr(new InputSource(input)));
+    impl_->source_ = impl_->sources_.back().get();
 }
 
 void
@@ -75,6 +102,8 @@ MasterLexer::popSource() {
                   "MasterLexer::popSource on an empty source");
     }
     impl_->sources_.pop_back();
+    impl_->source_ = impl_->sources_.empty() ? NULL :
+        impl_->sources_.back().get();
 }
 
 std::string
@@ -115,5 +144,142 @@ MasterLexer::Token::getErrorText() const {
     return (error_text[val_.error_code_]);
 }
 
+namespace master_lexer_internal {
+// Below we implement state classes for state transitions of MasterLexer.
+// Note that these need to be defined here so that they can refer to
+// the details of MasterLexerImpl.
+
+typedef MasterLexer::Token Token; // convenience shortcut
+
+bool
+State::wasLastEOL(const MasterLexer& lexer) const {
+    return (lexer.impl_->last_was_eol_);
+}
+
+const MasterLexer::Token&
+State::getToken(const MasterLexer& lexer) const {
+    return (lexer.impl_->token_);
+}
+
+size_t
+State::getParenCount(const MasterLexer& lexer) const {
+    return (lexer.impl_->paren_count_);
+}
+
+namespace {
+class CRLF : public State {
+public:
+    CRLF() {}
+    virtual const State* handle(MasterLexer& lexer) const {
+        // We've just seen '\r'.  If this is part of a sequence of '\r\n',
+        // we combine them as a single END-OF-LINE.  Otherwise we treat the
+        // single '\r' as an EOL and continue tokeniziation from the character
+        // immediately after '\r'.  One tricky case is that there's a comment
+        // between '\r' and '\n'.  This implementation combines these
+        // characters and treats them as a single EOL (the behavior derived
+        // from BIND 9).  Technically this may not be correct, but in practice
+        // the caller wouldn't distinguish this case from the case it has
+        // two EOLs, so we simplify the process.
+        const int c = getLexerImpl(lexer)->skipComment(
+            getLexerImpl(lexer)->source_->getChar());
+        if (c != '\n') {
+            getLexerImpl(lexer)->source_->ungetChar();
+        }
+        getLexerImpl(lexer)->token_ = Token(Token::END_OF_LINE);
+        getLexerImpl(lexer)->last_was_eol_ = true;
+        return (NULL);
+    }
+};
+
+// Currently this is provided mostly as a place holder
+class String : public State {
+public:
+    String() {}
+    virtual const State* handle(MasterLexer& /*lexer*/) const {
+        return (NULL);
+    }
+};
+
+// We use a common instance of a each state in a singleton-like way to save
+// construction overhead.  They are not singletons in its strict sense as
+// we don't prohibit direct construction of these objects.  But that doesn't
+// matter much anyway, because the definitions are completely hidden within
+// this file.
+const CRLF CRLF_STATE;
+const String STRING_STATE;
+}
+
+const State&
+State::getInstance(ID state_id) {
+    switch (state_id) {
+    case CRLF:
+        return (CRLF_STATE);
+    case String:
+        return (STRING_STATE);
+    }
+
+    // This is a bug of the caller, and this method is only expected to be
+    // used by tests, so we just forcefully make it fail by asserting the
+    // condition.
+    assert(false);
+    return (STRING_STATE); // a dummy return, to silence some compilers.
+}
+
+const State*
+State::start(MasterLexer& lexer, MasterLexer::Options options) {
+    // define some shortcuts
+    MasterLexer::MasterLexerImpl& lexerimpl = *lexer.impl_;
+    size_t& paren_count = lexerimpl.paren_count_;
+
+    while (true) {
+        const int c = lexerimpl.skipComment(lexerimpl.source_->getChar());
+        if (c == InputSource::END_OF_STREAM) {
+            lexerimpl.last_was_eol_ = false;
+            if (paren_count != 0) {
+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
+                paren_count = 0; // reset to 0; this helps in lenient mode.
+                return (NULL);
+            }
+            lexerimpl.token_ = Token(Token::END_OF_FILE);
+            return (NULL);
+        } else if (c == ' ' || c == '\t') {
+            // If requested and we are not in (), recognize the initial space.
+            if (lexerimpl.last_was_eol_ && paren_count == 0 &&
+                (options & MasterLexer::INITIAL_WS) != 0) {
+                lexerimpl.last_was_eol_ = false;
+                lexerimpl.token_ = Token(Token::INITIAL_WS);
+                return (NULL);
+            }
+        } else if (c == '\n') {
+            lexerimpl.last_was_eol_ = true;
+            if (paren_count == 0) { // we don't recognize EOL if we are in ()
+                lexerimpl.token_ = Token(Token::END_OF_LINE);
+                return (NULL);
+            }
+        } else if (c == '\r') {
+            if (paren_count == 0) { // check if we are in () (see above)
+                return (&CRLF_STATE);
+            }
+        } else if (c == '(') {
+            lexerimpl.last_was_eol_ = false;
+            ++paren_count;
+        } else if (c == ')') {
+            lexerimpl.last_was_eol_ = false;
+            if (paren_count == 0) {
+                lexerimpl.token_ = Token(Token::UNBALANCED_PAREN);
+                return (NULL);
+            }
+            --paren_count;
+        } else {
+            // Note: in #2373 we should probably ungetChar().
+            lexerimpl.last_was_eol_ = false;
+            return (&STRING_STATE);
+        }
+        // no code should be here; we just continue the loop.
+    }
+}
+
+} // namespace master_lexer_internal
+
 } // end of namespace dns
 } // end of namespace isc
diff --git a/src/lib/dns/master_lexer.h b/src/lib/dns/master_lexer.h
index da6bb5d..854d602 100644
--- a/src/lib/dns/master_lexer.h
+++ b/src/lib/dns/master_lexer.h
@@ -24,6 +24,9 @@
 
 namespace isc {
 namespace dns {
+namespace master_lexer_internal {
+class State;
+}
 
 /// \brief Tokenizer for parsing DNS master files.
 ///
@@ -64,9 +67,22 @@ namespace dns {
 /// this class does not throw for an error that would be reported as an
 /// exception in other classes.
 class MasterLexer {
+    friend class master_lexer_internal::State;
 public:
     class Token;       // we define it separately for better readability
 
+    /// \brief Options for getNextToken.
+    ///
+    /// A compound option, indicating multiple options are set, can be
+    /// specified using the logical OR operator (operator|()).
+    enum Options {
+        NONE = 0,               ///< No option
+        INITIAL_WS = 1, ///< recognize begin-of-line spaces after an
+                        ///< end-of-line
+        QSTRING = 2,    ///< recognize quoted string
+        NUMBER = 4   ///< recognize numeric text as integer
+    };
+
     /// \brief The constructor.
     ///
     /// \throw std::bad_alloc Internal resource allocation fails (rare case).
@@ -167,6 +183,16 @@ private:
     MasterLexerImpl* impl_;
 };
 
+/// \brief Operator to combine \c MasterLexer options
+///
+/// This is a trivial shortcut so that compound options can be specified
+/// in an intuitive way.
+inline MasterLexer::Options
+operator|(MasterLexer::Options o1, MasterLexer::Options o2) {
+    return (static_cast<MasterLexer::Options>(
+                static_cast<unsigned>(o1) | static_cast<unsigned>(o2)));
+}
+
 /// \brief Tokens for \c MasterLexer
 ///
 /// This is a simple value-class encapsulating a type of a lexer token and
@@ -192,7 +218,8 @@ public:
     enum Type {
         END_OF_LINE, ///< End of line detected (if asked for detecting it)
         END_OF_FILE, ///< End of file detected (if asked for detecting it)
-        INITIAL_WS,  ///< White spaces at the beginning of a line
+        INITIAL_WS,  ///< White spaces at the beginning of a line after an
+                     ///< end of line
         NOVALUE_TYPE_MAX = INITIAL_WS, ///< Max integer corresponding to
                                        /// no-value (type only) types.
                                        /// Mainly for internal use.
diff --git a/src/lib/dns/master_lexer_state.h b/src/lib/dns/master_lexer_state.h
new file mode 100644
index 0000000..86957c5
--- /dev/null
+++ b/src/lib/dns/master_lexer_state.h
@@ -0,0 +1,138 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#ifndef MASTER_LEXER_STATE_H
+#define MASTER_LEXER_STATE_H 1
+
+#include <dns/master_lexer.h>
+
+namespace isc {
+namespace dns {
+
+namespace master_lexer_internal {
+
+/// \brief Tokenization state for \c MasterLexer.
+///
+/// This is a base class of classes that represent various states of a single
+/// tokenization session of \c MasterLexer, i.e., the states used for a
+/// single call to \c MasterLexer::getNextToken().
+///
+/// It follows the convention of the state design pattern: each derived class
+/// corresponds to a specific state, and the state transition takes place
+/// through the virtual method named \c handle().  The \c handle() method
+/// takes the main \c MasterLexer object that holds all necessary internal
+/// context, and updates it as necessary; each \c State derived class is
+/// completely stateless.
+///
+/// The initial transition takes place in a static method of the base class,
+/// \c start().  This is mainly for implementation convenience; we need to
+/// pass options given to \c MasterLexer::getNextToken() for the initial
+/// state, so it makes more sense to separate the interface for the transition
+/// from the initial state.
+///
+/// When an object of a specific state class completes the session, it
+/// normally sets the identified token in the lexer, and returns NULL;
+/// if more transition is necessary, it returns a pointer to the next state
+/// object.
+///
+/// As is usual in the state design pattern, the \c State class is made
+/// a friend class of \c MasterLexer and can refer to its internal details.
+/// This is intentional; essentially its a part of \c MasterLexer and
+/// is defined as a separate class only for implementation clarity and better
+/// testability.  It's defined in a publicly visible header, but that's only
+/// for testing purposes.  No normal application or even no other classes of
+/// this library are expected to use this class.
+class State {
+public:
+    /// \brief Begin state transitions to get the next token.
+    ///
+    /// This is the first method that \c MasterLexer needs to call for a
+    /// tokenization session.  The lexer passes a reference to itself
+    /// and options given in \c getNextToken().
+    ///
+    /// \throw InputSource::ReadError Unexpected I/O error
+    /// \throw std::bad_alloc Internal resource allocation failure
+    ///
+    /// \param lexer The lexer object that holds the main context.
+    /// \param options The options passed to getNextToken().
+    /// \return A pointer to the next state object or NULL if the transition
+    /// is completed.
+    static const State* start(MasterLexer& lexer,
+                              MasterLexer::Options options);
+
+    /// \brief Handle the process of one specific state.
+    ///
+    /// This method is expected to be called on the object returned by
+    /// start(), and keep called on the returned object until NULL is
+    /// returned.  The call chain will form the complete state transition.
+    ///
+    /// \throw InputSource::ReadError Unexpected I/O error
+    /// \throw std::bad_alloc Internal resource allocation failure
+    ///
+    /// \param lexer The lexer object that holds the main context.
+    /// \return A pointer to the next state object or NULL if the transition
+    /// is completed.
+    virtual const State* handle(MasterLexer& lexer) const = 0;
+
+    /// \brief Types of states.
+    ///
+    /// Specific states are basically hidden within the implementation,
+    /// but we'd like to allow tests to examine them, so we provide
+    /// a way to get an instance of a specific state.
+    enum ID {
+        CRLF,                  ///< Just seen a carriage-return character
+        String                 ///< Handling a string token
+    };
+
+    /// \brief Returns a \c State instance of the given state.
+    ///
+    /// This is provided only for testing purposes so tests can check
+    /// the behavior of each state separately.  \c MasterLexer shouldn't
+    /// need this method.
+    static const State& getInstance(ID state_id);
+
+    /// \name Read-only accessors for testing purposes.
+    ///
+    /// These allow tests to inspect some selected portion of the internal
+    /// states of \c MasterLexer.  These shouldn't be used except for testing
+    /// purposes.
+    ///@{
+    bool wasLastEOL(const MasterLexer& lexer) const;
+    const MasterLexer::Token& getToken(const MasterLexer& lexer) const;
+    size_t getParenCount(const MasterLexer& lexer) const;
+    ///@}
+
+protected:
+    /// \brief An accessor to the internal implementation class of
+    /// \c MasterLexer.
+    ///
+    /// This is provided for specific derived classes as they are not direct
+    /// friends of \c MasterLexer.
+    ///
+    /// \param lexer The lexer object that holds the main context.
+    /// \return A pointer to the implementation class object of the given
+    /// lexer.  This is never NULL.
+    MasterLexer::MasterLexerImpl* getLexerImpl(MasterLexer& lexer) const {
+        return (lexer.impl_);
+    }
+};
+
+} // namespace master_lexer_internal
+} // namespace dns
+} // namespace isc
+#endif  // MASTER_LEXER_STATE_H
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/lib/dns/tests/Makefile.am b/src/lib/dns/tests/Makefile.am
index d5adc21..33867da 100644
--- a/src/lib/dns/tests/Makefile.am
+++ b/src/lib/dns/tests/Makefile.am
@@ -27,6 +27,7 @@ run_unittests_SOURCES += labelsequence_unittest.cc
 run_unittests_SOURCES += messagerenderer_unittest.cc
 run_unittests_SOURCES += master_lexer_token_unittest.cc
 run_unittests_SOURCES += master_lexer_unittest.cc
+run_unittests_SOURCES += master_lexer_state_unittest.cc
 run_unittests_SOURCES += name_unittest.cc
 run_unittests_SOURCES += nsec3hash_unittest.cc
 run_unittests_SOURCES += rrclass_unittest.cc rrtype_unittest.cc
diff --git a/src/lib/dns/tests/master_lexer_state_unittest.cc b/src/lib/dns/tests/master_lexer_state_unittest.cc
new file mode 100644
index 0000000..bcee7fd
--- /dev/null
+++ b/src/lib/dns/tests/master_lexer_state_unittest.cc
@@ -0,0 +1,256 @@
+// Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+#include <dns/master_lexer.h>
+#include <dns/master_lexer_inputsource.h>
+#include <dns/master_lexer_state.h>
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+
+using namespace isc::dns;
+using namespace master_lexer_internal;
+
+namespace {
+typedef MasterLexer::Token Token; // shortcut
+
+class MasterLexerStateTest : public ::testing::Test {
+protected:
+    MasterLexerStateTest() : common_options(MasterLexer::INITIAL_WS),
+                             s_null(NULL),
+                             s_crlf(State::getInstance(State::CRLF)),
+                             s_string(State::getInstance(State::String)),
+                             options(MasterLexer::NONE),
+                             orig_options(options)
+    {}
+
+    // Specify INITIAL_WS as common initial options.
+    const MasterLexer::Options common_options;
+    MasterLexer lexer;
+    const State* const s_null;
+    const State& s_crlf;
+    const State& s_string;
+    std::stringstream ss;
+    MasterLexer::Options options, orig_options;
+};
+
+// Common check for the end-of-file condition.
+// Token is set to END_OF_FILE, and the lexer was NOT last eol state.
+// Passed state can be any valid one; they are stateless, just providing the
+// interface for inspection.
+void
+eofCheck(const State& state, MasterLexer& lexer) {
+    EXPECT_EQ(Token::END_OF_FILE, state.getToken(lexer).getType());
+    EXPECT_FALSE(state.wasLastEOL(lexer));
+}
+
+TEST_F(MasterLexerStateTest, startAndEnd) {
+    // A simple case: the input is empty, so we begin with start and
+    // are immediately done.
+    lexer.pushSource(ss);
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    eofCheck(s_crlf, lexer);
+}
+
+TEST_F(MasterLexerStateTest, startToEOL) {
+    ss << "\n";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+    // The next lexer session will reach EOF.  Same eof check should pass.
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    eofCheck(s_crlf, lexer);
+}
+
+TEST_F(MasterLexerStateTest, space) {
+    // repeat '\t\n' twice (see below), then space after EOL
+    ss << " \t\n\t\n ";
+    lexer.pushSource(ss);
+
+    // by default space characters and tabs will be ignored.  We check this
+    // twice; at the second iteration, it's a white space at the beginning
+    // of line, but since we don't specify INITIAL_WS option, it's treated as
+    // normal space and ignored.
+    for (size_t i = 0; i < 2; ++i) {
+        EXPECT_EQ(s_null, State::start(lexer, MasterLexer::NONE));
+        EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+        EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    }
+
+    // Now we specify the INITIAL_WS option.  It will be recognized and the
+    // corresponding token will be returned.
+    EXPECT_EQ(s_null, State::start(lexer, MasterLexer::INITIAL_WS));
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, parentheses) {
+    ss << "\n(\na\n )\n "; // 1st \n is to check if 'was EOL' is set to false
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // handle \n
+
+    // Now handle '('.  It skips \n and recognize 'a' as string
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // check pre condition
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer)); // check post condition
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+
+    // skip 'a' (note: until #2373 it's actually skipped as part of the '('
+    // handling)
+    s_string.handle(lexer);
+
+    // Then handle ')'.  '\n' before ')' isn't recognized because
+    // it's canceled due to the '('.  Likewise, the space after the '\n'
+    // shouldn't be recognized but should be just ignored.
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+
+    // Now, temporarily disabled options are restored: Both EOL and the
+    // initial WS are recognized
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, nestedParentheses) {
+    // This is an unusual, but allowed (in this implementation) case.
+    ss << "(a(b)\n c)\n ";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                      // consume 'a'
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                     // consume 'b'
+    EXPECT_EQ(2, s_crlf.getParenCount(lexer)); // now the count is 2
+
+    // Close the inner most parentheses.  count will be decreased, but option
+    // shouldn't be restored yet, so the intermediate EOL or initial WS won't
+    // be recognized.
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume ')'
+    s_string.handle(lexer);                      // consume 'c'
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
+
+    // Close the outermost parentheses.  count will be reset to 0, and original
+    // options are restored.
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+
+    // Now, temporarily disabled options are restored: Both EOL and the
+    // initial WS are recognized
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, unbalancedParentheses) {
+    // Only closing paren is provided.  We prepend a \n to check if it's
+    // correctly canceled after detecting the error.
+    ss << "\n)";
+    ss << "(a";
+    lexer.pushSource(ss);
+
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // consume '\n'
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer)); // this \n was remembered
+
+    // Now checking ')'.  The result should be error, count shouldn't be
+    // changed.  "last EOL" should be canceled.
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer));
+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+    EXPECT_FALSE(s_crlf.wasLastEOL(lexer));
+
+    // Reach EOF with a dangling open parenthesis.
+    EXPECT_EQ(&s_string, State::start(lexer, common_options)); // consume '('
+    s_string.handle(lexer);                      // consume 'a'
+    EXPECT_EQ(1, s_crlf.getParenCount(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));    // reach EOF
+    ASSERT_EQ(Token::ERROR, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(Token::UNBALANCED_PAREN, s_crlf.getToken(lexer).getErrorCode());
+    EXPECT_EQ(0, s_crlf.getParenCount(lexer)); // should be reset to 0
+}
+
+TEST_F(MasterLexerStateTest, startToComment) {
+    // Begin with 'start', skip space, then encounter a comment.  Skip
+    // the rest of the line, and recognize the new line.  Note that the
+    // second ';' is simply ignored.
+    ss << "  ;a;\n";
+    ss << ";a;";           // Likewise, but the comment ends with EOF.
+    lexer.pushSource(ss);
+
+    // Comment ending with EOL
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+
+    // Comment ending with EOF
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, commentAfterParen) {
+    // comment after an opening parenthesis.  The code that is tested by
+    // other tests should also ensure that it works correctly, but we
+    // check it explicitly.
+    ss << "( ;this is a comment\na)\n";
+    lexer.pushSource(ss);
+
+    // consume '(', skip comments, consume 'a', then consume ')'
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+    s_string.handle(lexer);
+    EXPECT_EQ(s_null, State::start(lexer, common_options));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+}
+
+TEST_F(MasterLexerStateTest, crlf) {
+    ss << "\r\n";               // case 1
+    ss << "\r ";                // case 2
+    ss << "\r;comment\na";      // case 3
+    ss << "\r";                 // case 4
+    lexer.pushSource(ss);
+
+    // 1. A sequence of \r, \n is recognized as a single 'end-of-line'
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));   // recognize '\n'
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_TRUE(s_crlf.wasLastEOL(lexer));
+
+    // 2. Single '\r' (not followed by \n) is recognized as a single
+    // 'end-of-line'.  then there will be "initial WS"
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    // see ' ', "unget" it
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options)); // recognize ' '
+    EXPECT_EQ(Token::INITIAL_WS, s_crlf.getToken(lexer).getType());
+
+    // 3. comment between \r and \n
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    // skip comments, recognize '\n'
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(Token::END_OF_LINE, s_crlf.getToken(lexer).getType());
+    EXPECT_EQ(&s_string, State::start(lexer, common_options));
+
+    // 4. \r then EOF
+    EXPECT_EQ(&s_crlf, State::start(lexer, common_options)); // recognize '\r'
+    // see EOF, then "unget" it
+    EXPECT_EQ(s_null, s_crlf.handle(lexer));
+    EXPECT_EQ(s_null, State::start(lexer, common_options));  // recognize EOF
+    EXPECT_EQ(Token::END_OF_FILE, s_crlf.getToken(lexer).getType());
+}
+
+}