diff options
author | Kohei Yoshida <kohei.yoshida@suse.com> | 2011-09-07 23:14:52 -0400 |
---|---|---|
committer | Kohei Yoshida <kohei.yoshida@suse.com> | 2011-09-07 23:17:22 -0400 |
commit | 7015bad5a7c767c51e7a89a005b5315669a954bc (patch) | |
tree | c8c0a72b3a773e34a420ca2569048004b4440c17 /sc/inc | |
parent | a58ed493e572fef2c503bd329e924bb062ba9c96 (diff) |
Updated css_parser from orcus, plus added experimental csv_parser.
Diffstat (limited to 'sc/inc')
-rw-r--r-- | sc/inc/orcus/css_parser.hpp | 17 | ||||
-rw-r--r-- | sc/inc/orcus/csv_parser.hpp | 280 |
2 files changed, 293 insertions, 4 deletions
diff --git a/sc/inc/orcus/css_parser.hpp b/sc/inc/orcus/css_parser.hpp index 7a1b3e51241f..c211ccaebcbc 100644 --- a/sc/inc/orcus/css_parser.hpp +++ b/sc/inc/orcus/css_parser.hpp @@ -143,7 +143,7 @@ void css_parser<_Handler>::parse() std::cout << "'" << std::endl; #endif m_handler.begin_parse(); - for (; has_char(); next()) + while (has_char()) rule(); m_handler.end_parse(); } @@ -151,7 +151,7 @@ void css_parser<_Handler>::parse() template<typename _Handler> void css_parser<_Handler>::rule() { - // <name> , ... , <name> { <properties> } + // <selector name> , ... , <selector name> <block> while (has_char()) { char c = cur_char(); @@ -201,7 +201,11 @@ void css_parser<_Handler>::at_rule_name() template<typename _Handler> void css_parser<_Handler>::selector_name() { + // <element name> + // '.' <class name> // <element name> '.' <class name> + // + // Both element and class names are identifiers. assert(has_char()); char c = cur_char(); @@ -239,6 +243,8 @@ void css_parser<_Handler>::selector_name() template<typename _Handler> void css_parser<_Handler>::property_name() { + // <identifier> + assert(has_char()); char c = cur_char(); if (!is_alpha(c) && c != '.') @@ -259,7 +265,8 @@ void css_parser<_Handler>::property_name() template<typename _Handler> void css_parser<_Handler>::property() { - // <name> : <value> , ... , <value> + // <property name> : <value> , ... , <value> + m_handler.begin_property(); property_name(); if (cur_char() != ':') @@ -286,6 +293,8 @@ void css_parser<_Handler>::property() template<typename _Handler> void css_parser<_Handler>::quoted_value() { + // Parse until the the end quote is reached. + assert(cur_char() == '"'); next(); const char* p = mp_char; @@ -373,7 +382,7 @@ void css_parser<_Handler>::property_sep() template<typename _Handler> void css_parser<_Handler>::block() { - // '{' <property> ';' ... ';' <property> '}' + // '{' <property> ';' ... ';' <property> ';'(optional) '}' assert(cur_char() == '{'); #if ORCUS_DEBUG_CSS diff --git a/sc/inc/orcus/csv_parser.hpp b/sc/inc/orcus/csv_parser.hpp new file mode 100644 index 000000000000..828a8b6cd035 --- /dev/null +++ b/sc/inc/orcus/csv_parser.hpp @@ -0,0 +1,280 @@ +/************************************************************************* + * + * Copyright (c) 2011 Kohei Yoshida + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + ************************************************************************/ + +#ifndef __ORCUS_CSV_PARSER_HPP__ +#define __ORCUS_CSV_PARSER_HPP__ + +#define ORCUS_DEBUG_CSV 0 + +#include <cstdlib> +#include <cstring> +#include <exception> +#include <string> +#include <cassert> +#include <sstream> + +#if ORCUS_DEBUG_CSV +#include <iostream> +using std::cout; +using std::endl; +#endif + +namespace orcus { + +struct csv_parser_config +{ + std::string delimiters; + char text_qualifier; + bool trim_cell_value:1; + + csv_parser_config() : + trim_cell_value(true) {} +}; + +class csv_parse_error : public std::exception +{ + std::string m_msg; +public: + csv_parse_error(const std::string& msg) : m_msg(msg) {} + virtual ~csv_parse_error() throw() {} + virtual const char* what() const throw() { return m_msg.c_str(); } +}; + +template<typename _Handler> +class csv_parser +{ +public: + typedef _Handler handler_type; + + csv_parser(const char* p, size_t n, handler_type& hdl, const csv_parser_config& config); + void parse(); + +private: + bool has_char() const { return m_pos < m_length; } + void next(); + char cur_char() const; + + bool is_delim(char c) const; + bool is_text_qualifier(char c) const; + + // handlers + void row(); + void cell(); + void quoted_cell(); + + /** + * Push cell value to the handler. + */ + void push_cell_value(const char* p, size_t n); + + static bool is_blank(char c) + { + return c == ' ' || c == '\t'; + } + +private: + handler_type& m_handler; + const csv_parser_config& m_config; + const char* mp_char; + size_t m_pos; + size_t m_length; +}; + +template<typename _Handler> +csv_parser<_Handler>::csv_parser(const char* p, size_t n, handler_type& hdl, const csv_parser_config& config) : + m_handler(hdl), m_config(config), mp_char(p), m_pos(0), m_length(n) {} + +template<typename _Handler> +void csv_parser<_Handler>::parse() +{ +#if ORCUS_DEBUG_CSV + const char* p = mp_char; + for (size_t i = m_pos; i < m_length; ++i, ++p) + std::cout << *p; + std::cout << std::endl; +#endif + + m_handler.begin_parse(); + while (has_char()) + row(); + m_handler.end_parse(); +} + +template<typename _Handler> +void csv_parser<_Handler>::next() +{ + ++m_pos; + ++mp_char; +} + +template<typename _Handler> +char csv_parser<_Handler>::cur_char() const +{ + return *mp_char; +} + +template<typename _Handler> +bool csv_parser<_Handler>::is_delim(char c) const +{ + return m_config.delimiters.find(c) != std::string::npos; +} + +template<typename _Handler> +bool csv_parser<_Handler>::is_text_qualifier(char c) const +{ + return m_config.text_qualifier == c; +} + +template<typename _Handler> +void csv_parser<_Handler>::row() +{ + m_handler.begin_row(); + while (true) + { + if (is_text_qualifier(cur_char())) + quoted_cell(); + else + cell(); + + if (!has_char()) + { + m_handler.end_row(); + return; + } + + char c = cur_char(); + if (c == '\n') + { + next(); +#if ORCUS_DEBUG_CSV + cout << "(LF)" << endl; +#endif + m_handler.end_row(); + return; + } + + assert(is_delim(c)); + next(); + } +} + +template<typename _Handler> +void csv_parser<_Handler>::cell() +{ + const char* p = mp_char; + size_t len = 0; + char c = cur_char(); + while (c != '\n' && !is_delim(c)) + { + ++len; + next(); + if (!has_char()) + break; + c = cur_char(); + } + + if (!len) + p = NULL; + + push_cell_value(p, len); +} + +template<typename _Handler> +void csv_parser<_Handler>::quoted_cell() +{ + char c = cur_char(); + assert(is_text_qualifier(c)); + next(); // Skip the opening quote. + if (!has_char()) + return; + + const char* p = mp_char; + size_t len = 0; + for (c = cur_char(); !is_text_qualifier(c); c = cur_char()) + { + ++len; + next(); + if (!has_char()) + { + // Stream ended prematurely. Handle it gracefully. + push_cell_value(p, len); + return; + } + } + + assert(is_text_qualifier(c)); + next(); // Skip the closing quote. + c = cur_char(); + if (!is_delim(c)) + { + std::ostringstream os; + os << "A quoted cell value must be immediately followed by a delimiter. "; + os << "'" << c << "' is found instead."; + throw csv_parse_error(os.str()); + } + + if (!len) + p = NULL; + + push_cell_value(p, len); +} + +template<typename _Handler> +void csv_parser<_Handler>::push_cell_value(const char* p, size_t n) +{ + size_t len = n; + + if (m_config.trim_cell_value) + { + // Trim any leading blanks. + for (size_t i = 0; i < n; ++i, --len, ++p) + { + if (!is_blank(*p)) + break; + } + + // Trim any trailing blanks. + if (len) + { + const char* p_end = p + (len-1); + for (; p != p_end; --p_end, --len) + { + if (!is_blank(*p_end)) + break; + } + } + } + + m_handler.cell(p, len); +#if ORCUS_DEBUG_CSV + cout << "(cell:'" << std::string(p, len) << "')"; +#endif +} + +} + +#endif |