BIND 10 trac3360, updated. 6199f3a5ea37a65a2405e571a5b189dc127af138 [3360] Added CSV file validation.

BIND 10 source code commits bind10-changes at lists.isc.org
Fri Mar 14 14:38:23 UTC 2014


The branch, trac3360 has been updated
       via  6199f3a5ea37a65a2405e571a5b189dc127af138 (commit)
      from  c3f63b985fde7d458d0624960ff3ecb17269709b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 6199f3a5ea37a65a2405e571a5b189dc127af138
Author: Marcin Siodelski <marcin at isc.org>
Date:   Fri Mar 14 15:37:57 2014 +0100

    [3360] Added CSV file validation.

-----------------------------------------------------------------------

Summary of changes:
 src/lib/util/csv_file.cc                |  144 +++++++++++++++++++++++++-----
 src/lib/util/csv_file.h                 |   77 +++++++++++++++-
 src/lib/util/tests/csv_file_unittest.cc |  147 ++++++++++++++++++++++++++++---
 3 files changed, 334 insertions(+), 34 deletions(-)

-----------------------------------------------------------------------
diff --git a/src/lib/util/csv_file.cc b/src/lib/util/csv_file.cc
index 10ce9e5..6fc44ca 100644
--- a/src/lib/util/csv_file.cc
+++ b/src/lib/util/csv_file.cc
@@ -124,7 +124,8 @@ CSVRow::checkIndex(const size_t at) const {
 }
 
 CSVFile::CSVFile(const std::string& filename)
-    : primary_separator_(','), filename_(filename), fs_() {
+    : primary_separator_(','), filename_(filename), fs_(), cols_(0),
+      read_msg_() {
 }
 
 CSVFile::~CSVFile() {
@@ -142,6 +143,12 @@ CSVFile::close() {
 }
 
 void
+CSVFile::flush() const {
+    checkStreamStatus("flush");
+    fs_->flush();
+}
+
+void
 CSVFile::addColumn(const std::string& col_name) {
     if (getColumnIndex(col_name) >= 0) {
         isc_throw(CSVFileError, "attempt to add duplicate column '"
@@ -152,10 +159,10 @@ CSVFile::addColumn(const std::string& col_name) {
 
 void
 CSVFile::append(const CSVRow& row) const {
-    if (!fs_) {
-        isc_throw(CSVFileError, "unable to write a row to the CSV file '"
-                  << filename_ << "', which is closed");
-    }
+    checkStreamStatus("append");
+
+    // If a stream is in invalid state, reset the state.
+    fs_->clear();
 
     if (row.getValuesCount() != getColumnCount()) {
         isc_throw(CSVFileError, "number of values in the CSV row '"
@@ -176,6 +183,19 @@ CSVFile::append(const CSVRow& row) const {
     }
 }
 
+void
+CSVFile::checkStreamStatus(const std::string& operation) const {
+    if (!fs_) {
+        isc_throw(CSVFileError, "NULL stream pointer when performing '"
+                  << operation << "' on file '" << filename_ << "'");
+
+    } else if (!fs_->is_open()) {
+        isc_throw(CSVFileError, "closed stream when performing '"
+                  << operation << "' on file '" << filename_ << "'");
+
+    }
+}
+
 std::ifstream::pos_type
 CSVFile::size() const {
     std::ifstream fs(filename_.c_str());
@@ -186,11 +206,16 @@ CSVFile::size() const {
         fs.close();
         return (0);
     }
-    // Seek to the end of file and see where we are. This is a size of
-    // the file.
-    fs.seekg(0, std::ifstream::end);
-    std::ifstream::pos_type pos = fs.tellg();
-    fs.close();
+    std::ifstream::pos_type pos;
+    try {
+        // Seek to the end of file and see where we are. This is a size of
+        // the file.
+        fs.seekg(0, std::ifstream::end);
+        pos = fs.tellg();
+        fs.close();
+    } catch (const std::exception& ex) {
+        return (0);
+    }
     return (pos);
 }
 
@@ -214,8 +239,25 @@ CSVFile::getColumnName(const size_t col_index) const {
     return (cols_[col_index]);
 }
 
-void
-CSVFile::next(CSVRow& row) {
+bool
+CSVFile::next(CSVRow& row, const bool skip_validation) {
+    // Set somethings as row validation error. Although, we haven't started
+    // actual row validation we should get rid of any previously recorded
+    // errors so as the caller doesn't interpret them as the current one.
+    setReadMsg("validation not started");
+
+    try {
+        // Check that stream is "ready" for any IO operations.
+        checkStreamStatus("get next row");
+
+    } catch (isc::Exception& ex) {
+        setReadMsg(ex.what());
+        return (false);
+    }
+
+    // If a stream is in invalid state, reset the state.
+    fs_->clear();
+
     // Get exactly one line of the file.
     std::string line;
     std::getline(*fs_, line);
@@ -223,10 +265,20 @@ CSVFile::next(CSVRow& row) {
     // return an empty row.
     if (line.empty() && fs_->eof()) {
         row = EMPTY_ROW();
-        return;
+        return (true);
+
+    } else if (!fs_->good()) {
+        // If we hit an IO error, communicate it to the caller but do NOT close
+        // the stream. Caller may try again.
+        setReadMsg("error reading a row from CSV file '"
+                   + std::string(filename_) + "'");
+        return (false);
     }
     // If we read anything, parse it.
     row.parse(line.c_str());
+
+    // And check if it is correct.
+    return (skip_validation ? true : validate(row));
 }
 
 void
@@ -248,14 +300,34 @@ CSVFile::open() {
         // Make sure we are on the beginning of the file, so as we can parse
         // the header.
         fs_->seekg(0);
+        if (!fs_->good()) {
+            close();
+            isc_throw(CSVFileError, "unable to set read pointer in the file '"
+                      << filename_ << "'");
+        }
 
-        // Get the header.
-        std::string line;
-        std::getline(*fs_, line);
-        CSVRow header(line.c_str(), primary_separator_);
-        // Initialize columns.
-        for (size_t i = 0; i < header.getValuesCount(); ++i) {
-            addColumn(header.readAt(i));
+        // Read the header.
+        CSVRow header;
+        if (!next(header, true)) {
+            close();
+            isc_throw(CSVFileError, "failed to read and parse header of the"
+                      " CSV file '" << filename_ << "': "
+                      << getReadMsg());
+        }
+
+        // Check the header against the columns specified for the CSV file.
+        if (!validateHeader(header)) {
+            close();
+            isc_throw(CSVFileError, "invalid header '" << header
+                      << "' in CSV file '" << filename_ << "'");
+        }
+
+        // Everything is good, so if we haven't added any columns yet,
+        // add them.
+        if (getColumnCount() == 0) {
+            for (size_t i = 0; i < header.getValuesCount(); ++i) {
+                addColumn(header.readAt(i));
+            }
         }
     }
 }
@@ -291,5 +363,37 @@ CSVFile::recreate() {
 
 }
 
+bool
+CSVFile::validate(const CSVRow& row) {
+    setReadMsg("success");
+    bool ok = (row.getValuesCount() == getColumnCount());
+    if (!ok) {
+        std::ostringstream s;
+        s << "the size of the row '" << row << "' doesn't match the number of"
+            " columns '" << getColumnCount() << "' of the CSV file '"
+          << filename_ << "'";
+        setReadMsg(s.str());
+    }
+    return (ok);
 }
+
+bool
+CSVFile::validateHeader(const CSVRow& header) {
+    if (getColumnCount() == 0) {
+        return (true);
+    }
+
+    if (getColumnCount() != header.getValuesCount()) {
+        return (false);
+    }
+
+    for (int i = 0; i < getColumnCount(); ++i) {
+        if (getColumnName(i) != header.readAt(i)) {
+            return (false);
+        }
+    }
+    return (true);
 }
+
+} // end of isc::util namespace
+} // end of isc namespace
diff --git a/src/lib/util/csv_file.h b/src/lib/util/csv_file.h
index 52267f8..bf2bdb2 100644
--- a/src/lib/util/csv_file.h
+++ b/src/lib/util/csv_file.h
@@ -73,7 +73,7 @@ public:
     /// @param cols Number of values in the row.
     /// @param separator Character being a separator between values in the
     /// text representation of the row.
-    CSVRow(const size_t cols, const char separator = ',');
+    CSVRow(const size_t cols = 0, const char separator = ',');
 
     /// @brief Constructor, parses a single row of the CSV file.
     ///
@@ -270,11 +270,22 @@ public:
     /// @brief Closes the CSV file.
     void close();
 
+    /// @brief Flushes a file.
+    void flush() const;
+
     /// @brief Returns the number of columns in the file.
     size_t getColumnCount() const {
         return (cols_.size());
     }
 
+    /// @brief Returns the description of the last error returned by the
+    /// @c CSVFile::next function.
+    ///
+    /// @return Description of the last error during row validation.
+    std::string getReadMsg() const {
+        return (read_msg_);
+    }
+
     /// @brief Returns the index of the column having specified name.
     ///
     /// This function is exception safe.
@@ -299,7 +310,11 @@ public:
     /// reached, the empty row is returned (a row containing no values).
     ///
     /// @param [out] row Object receiving the parsed CSV file.
-    void next(CSVRow& row);
+    /// @param skip_validation Do not perform validation.
+    ///
+    /// @return true if row has been read and validated; false if validation
+    /// failed.
+    bool next(CSVRow& row, const bool skip_validation = false);
 
     /// @brief Opens existing file or creates a new one.
     ///
@@ -322,14 +337,69 @@ public:
     /// should be called.
     void recreate();
 
+    /// @brief Sets error message after row validation.
+    ///
+    /// The @c CSVFile::validate function is responsible for setting the
+    /// error message after validation of the row read from the CSV file.
+    /// It will use this function to set this message. Note, that the
+    /// @c validate function can set a message after successful validation
+    /// too. Such message could say "success", or something similar.
+    ///
+    /// @param val_msg Error message to be set.
+    void setReadMsg(const std::string& read_msg) {
+        read_msg_ = read_msg;
+    }
+
     /// @brief Represents empty row.
     static CSVRow EMPTY_ROW() {
         static CSVRow row(0);
         return (row);
     }
 
+protected:
+
+    /// @brief Validate the row read from a file.
+    ///
+    /// This function implements a basic validation for the row read from the
+    /// CSV file. It is virtual so as it may be customized in derived classes.
+    ///
+    /// This default implementation checks that the number of values in the
+    /// row corresponds to the number of columns specified for this file.
+    ///
+    /// If row validation fails, the error message is noted and can be retrieved
+    /// using @c CSVFile::getReadMsg. The function which overrides this
+    /// base implementation is responsible for setting the error message using
+    /// @c CSVFile::setReadMsg.
+    ///
+    /// @param row A row to be validated.
+    ///
+    /// @return true if the column is valid; false otherwise.
+    virtual bool validate(const CSVRow& row);
+
 private:
 
+    /// @brief This function validates the header of the CSV file.
+    ///
+    /// If there are any columns added to the @c CSVFile object, it will
+    /// compare that they exactly match (including order) the header read
+    /// from the file.
+    ///
+    /// This function is called internally by @CSVFile::open.
+    ///
+    /// @param header A row holding a header.
+    /// @return true if header matches the columns; false otherwise.
+    bool validateHeader(const CSVRow& header);
+
+    /// @brief Sanity check if stream is open.
+    ///
+    /// Checks if the file stream is open so as IO operations can be performed
+    /// on it. This is internally called by the public class members to prevent
+    /// them from performing IO operations on invalid stream and using NULL
+    /// pointer to a stream.
+    ///
+    /// @throw CSVFileError if stream is closed or pointer to it is NULL.
+    void checkStreamStatus(const std::string& operation) const;
+
     /// @brief Returns size of the CSV file.
     std::ifstream::pos_type size() const;
 
@@ -344,6 +414,9 @@ private:
 
     /// @brief Holds CSV file columns.
     std::vector<std::string> cols_;
+
+    /// @brief Holds last error during row reading or validation.
+    std::string read_msg_;
 };
 
 } // namespace isc::util
diff --git a/src/lib/util/tests/csv_file_unittest.cc b/src/lib/util/tests/csv_file_unittest.cc
index b22ecaa..9e0dca2 100644
--- a/src/lib/util/tests/csv_file_unittest.cc
+++ b/src/lib/util/tests/csv_file_unittest.cc
@@ -136,6 +136,7 @@ public:
     /// @param contents Contents of the file.
     void writeFile(const std::string& contents) const;
 
+    /// @brief Absolute path to the file used in the tests.
     std::string testfile_;
 
 };
@@ -190,13 +191,16 @@ CSVFileTest::writeFile(const std::string& contents) const {
     }
 }
 
-// This test checks that the file can be opened and its content
-// parsed correctly. It also checks that empty row is returned
-// when EOF is reached.
-TEST_F(CSVFileTest, open) {
+// This test checks that the file can be opened,  its whole content is
+// parsed correctly and data may be appended. It also checks that empty
+// row is returned when EOF is reached.
+TEST_F(CSVFileTest, openReadAllWrite) {
+    // Create a new CSV file that contains a header and two data rows.
     writeFile("animal,age,color\n"
               "cat,10,white\n"
               "lion,15,yellow\n");
+
+    // Open this file and check that the header is parsed.
     boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
     ASSERT_NO_THROW(csv->open());
     ASSERT_EQ(3, csv->getColumnCount());
@@ -204,33 +208,65 @@ TEST_F(CSVFileTest, open) {
     EXPECT_EQ("age", csv->getColumnName(1));
     EXPECT_EQ("color", csv->getColumnName(2));
 
-    CSVRow row(0);
-    ASSERT_NO_THROW(csv->next(row));
+    // Read first row.
+    CSVRow row;
+    ASSERT_TRUE(csv->next(row));
     ASSERT_EQ(3, row.getValuesCount());
     EXPECT_EQ("cat", row.readAt(0));
     EXPECT_EQ("10", row.readAt(1));
     EXPECT_EQ("white", row.readAt(2));
 
-    ASSERT_NO_THROW(csv->next(row));
+    // Read second row.
+    ASSERT_TRUE(csv->next(row));
     ASSERT_EQ(3, row.getValuesCount());
     EXPECT_EQ("lion", row.readAt(0));
     EXPECT_EQ("15", row.readAt(1));
     EXPECT_EQ("yellow", row.readAt(2));
 
-    ASSERT_NO_THROW(csv->next(row));
+    // There is no 3rd row, so the empty one should be returned.
+    ASSERT_TRUE(csv->next(row));
+    EXPECT_EQ(CSVFile::EMPTY_ROW(), row);
+
+    // It should be fine to read again, but again empty row should be returned.
+    ASSERT_TRUE(csv->next(row));
     EXPECT_EQ(CSVFile::EMPTY_ROW(), row);
+
+    // Now, let's try to append something to this file.
+    CSVRow row_write(3);
+    row_write.writeAt(0, "dog");
+    row_write.writeAt(1, 2);
+    row_write.writeAt(2, "blue");
+    ASSERT_NO_THROW(csv->append(row_write));
+
+    // Close the file.
+    ASSERT_NO_THROW(csv->flush());
+    csv->close();
+
+    // Check the the file contents are correct.
+    EXPECT_EQ("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow\n"
+              "dog,2,blue\n",
+              readFile());
+
+    // Any attempt to read from the file or write to it should now fail.
+    EXPECT_FALSE(csv->next(row));
+    EXPECT_THROW(csv->append(row_write), CSVFileError);
 }
 
-// This test checks that a file can be used both for reading
-// and writing. When content is appended to the end of file,
-// an attempt to read results in empty row returned.
-TEST_F(CSVFileTest, openReadWrite) {
+// This test checks that contents may be appended to a file which hasn't
+// been fully parsed/read.
+TEST_F(CSVFileTest, openReadPartialWrite) {
+    // Create a CSV file with two rows in it.
     writeFile("animal,age,color\n"
               "cat,10,white\n"
               "lion,15,yellow\n");
+
+    // Open this file.
     boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
     ASSERT_NO_THROW(csv->open());
 
+    // Read the first row.
     CSVRow row0(0);
     ASSERT_NO_THROW(csv->next(row0));
     ASSERT_EQ(3, row0.getValuesCount());
@@ -238,15 +274,31 @@ TEST_F(CSVFileTest, openReadWrite) {
     EXPECT_EQ("10", row0.readAt(1));
     EXPECT_EQ("white", row0.readAt(2));
 
+    // There is still second row to be read. But, it should be possible to
+    // skip reading it and append new row to the end of file.
     CSVRow row_write(3);
     row_write.writeAt(0, "dog");
     row_write.writeAt(1, 2);
     row_write.writeAt(2, "blue");
     ASSERT_NO_THROW(csv->append(row_write));
 
+    // At this point, the file pointer is at the end of file, so reading
+    // should return empty row.
     CSVRow row1(0);
     ASSERT_NO_THROW(csv->next(row1));
     EXPECT_EQ(CSVFile::EMPTY_ROW(), row1);
+
+    // Close the file.
+    ASSERT_NO_THROW(csv->flush());
+    csv->close();
+
+    // Check that there are two initial lines and one new there.
+    EXPECT_EQ("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow\n"
+              "dog,2,blue\n",
+              readFile());
+
 }
 
 // This test checks that the new CSV file is created and header
@@ -274,11 +326,82 @@ TEST_F(CSVFileTest, recreate) {
     row1.writeAt(2, 2);
     ASSERT_NO_THROW(csv->append(row1));
 
+    ASSERT_NO_THROW(csv->flush());
+    csv->close();
+
     EXPECT_EQ("animal,color,age,comments\n"
               "dog,grey,3,nice one\n"
               "cat,black,2,\n",
               readFile());
 }
 
+// This test checks that the error is reported when the size of the row being
+// read doesn't match the number of columns of the CSV file.
+TEST_F(CSVFileTest, validate) {
+    // Create CSV file with 2 invalid rows in it: one too long, one too short.
+    // Apart from that, there are two valid columns that should be read
+    // successfuly.
+    writeFile("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow,black\n"
+              "dog,3,green\n"
+              "elephant,11\n");
+
+    boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
+    ASSERT_NO_THROW(csv->open());
+    // First row is correct.
+    CSVRow row0;
+    ASSERT_TRUE(csv->next(row0));
+    EXPECT_EQ("cat", row0.readAt(0));
+    EXPECT_EQ("10", row0.readAt(1));
+    EXPECT_EQ("white", row0.readAt(2));
+    EXPECT_EQ("success", csv->getReadMsg());
+    // This row is too long.
+    CSVRow row1;
+    EXPECT_FALSE(csv->next(row1));
+    EXPECT_NE("success", csv->getReadMsg());
+    // This row is correct.
+    CSVRow row2;
+    ASSERT_TRUE(csv->next(row2));
+    EXPECT_EQ("dog", row2.readAt(0));
+    EXPECT_EQ("3", row2.readAt(1));
+    EXPECT_EQ("green", row2.readAt(2));
+    EXPECT_EQ("success", csv->getReadMsg());
+    // This row is too short.
+    CSVRow row3;
+    EXPECT_FALSE(csv->next(row3));
+    EXPECT_NE("success", csv->getReadMsg());
+}
+
+// Test test checks that exception is thrown when the header of the CSV file
+// parsed, doesn't match the columns specified.
+TEST_F(CSVFileTest, validateHeader) {
+    // Create CSV file with 3 columns.
+    writeFile("animal,age,color\n"
+              "cat,10,white\n"
+              "lion,15,yellow,black\n");
+
+    // Invalid order of columns.
+    boost::scoped_ptr<CSVFile> csv(new CSVFile(testfile_));
+    csv->addColumn("color");
+    csv->addColumn("animal");
+    csv->addColumn("age");
+    EXPECT_THROW(csv->open(), CSVFileError);
+
+    // Too many columns.
+    csv.reset(new CSVFile(testfile_));
+    csv->addColumn("animal");
+    csv->addColumn("age");
+    csv->addColumn("color");
+    csv->addColumn("notes");
+    EXPECT_THROW(csv->open(), CSVFileError);
+
+    // Too few columns.
+    csv.reset(new CSVFile(testfile_));
+    csv->addColumn("animal");
+    csv->addColumn("age");
+    EXPECT_THROW(csv->open(), CSVFileError);
+}
+
 
 } // end of anonymous namespace



More information about the bind10-changes mailing list