123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- // Copyright (c) 2001-2011 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM)
- #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM
- #if defined(_MSC_VER)
- #pragma once
- #endif
- #include <iosfwd>
- #include <boost/spirit/home/support/detail/lexer/generator.hpp>
- #include <boost/spirit/home/support/detail/lexer/rules.hpp>
- #include <boost/spirit/home/support/detail/lexer/consts.hpp>
- #include <boost/spirit/home/support/unused.hpp>
- #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
- #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
- #include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp>
- #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
- #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
- #include <boost/spirit/home/support/detail/lexer/debug.hpp>
- #endif
- #include <iterator> // for std::iterator_traits
- namespace boost { namespace spirit { namespace lex { namespace lexertl
- {
- ///////////////////////////////////////////////////////////////////////////
- namespace detail
- {
- ///////////////////////////////////////////////////////////////////////
- // The must_escape function checks if the given character value needs
- // to be preceded by a backslash character to disable its special
- // meaning in the context of a regular expression
- ///////////////////////////////////////////////////////////////////////
- template <typename Char>
- inline bool must_escape(Char c)
- {
- // FIXME: more needed?
- switch (c) {
- case '+': case '/': case '*': case '?':
- case '|':
- case '(': case ')':
- case '[': case ']':
- case '{': case '}':
- case '.':
- case '^': case '$':
- case '\\':
- case '"':
- return true;
- default:
- break;
- }
- return false;
- }
- ///////////////////////////////////////////////////////////////////////
- // The escape function returns the string representation of the given
- // character value, possibly escaped with a backslash character, to
- // allow it being safely used in a regular expression definition.
- ///////////////////////////////////////////////////////////////////////
- template <typename Char>
- inline std::basic_string<Char> escape(Char ch)
- {
- std::basic_string<Char> result(1, ch);
- if (detail::must_escape(ch))
- {
- typedef typename std::basic_string<Char>::size_type size_type;
- result.insert((size_type)0, 1, '\\');
- }
- return result;
- }
- ///////////////////////////////////////////////////////////////////////
- //
- ///////////////////////////////////////////////////////////////////////
- inline boost::lexer::regex_flags map_flags(unsigned int flags)
- {
- unsigned int retval = boost::lexer::none;
- if (flags & match_flags::match_not_dot_newline)
- retval |= boost::lexer::dot_not_newline;
- if (flags & match_flags::match_icase)
- retval |= boost::lexer::icase;
- return boost::lexer::regex_flags(retval);
- }
- }
- ///////////////////////////////////////////////////////////////////////////
- template <typename Lexer, typename F>
- bool generate_static(Lexer const&
- , std::basic_ostream<typename Lexer::char_type>&
- , typename Lexer::char_type const*, F);
- ///////////////////////////////////////////////////////////////////////////
- //
- // Every lexer type to be used as a lexer for Spirit has to conform to
- // the following public interface:
- //
- // typedefs:
- // iterator_type The type of the iterator exposed by this lexer.
- // token_type The type of the tokens returned from the exposed
- // iterators.
- //
- // functions:
- // default constructor
- // Since lexers are instantiated as base classes
- // only it might be a good idea to make this
- // constructor protected.
- // begin, end Return a pair of iterators, when dereferenced
- // returning the sequence of tokens recognized in
- // the input stream given as the parameters to the
- // begin() function.
- // add_token Should add the definition of a token to be
- // recognized by this lexer.
- // clear Should delete all current token definitions
- // associated with the given state of this lexer
- // object.
- //
- // template parameters:
- // Iterator The type of the iterator used to access the
- // underlying character stream.
- // Token The type of the tokens to be returned from the
- // exposed token iterator.
- // Functor The type of the InputPolicy to use to instantiate
- // the multi_pass iterator type to be used as the
- // token iterator (returned from begin()/end()).
- //
- ///////////////////////////////////////////////////////////////////////////
- ///////////////////////////////////////////////////////////////////////////
- //
- // The lexer class is a implementation of a Spirit.Lex lexer on
- // top of Ben Hanson's lexertl library as outlined above (For more
- // information about lexertl go here: http://www.benhanson.net/lexertl.html).
- //
- // This class is supposed to be used as the first and only template
- // parameter while instantiating instances of a lex::lexer class.
- //
- ///////////////////////////////////////////////////////////////////////////
- template <typename Token = token<>
- , typename Iterator = typename Token::iterator_type
- , typename Functor = functor<Token, lexertl::detail::data, Iterator> >
- class lexer
- {
- private:
- struct dummy { void true_() {} };
- typedef void (dummy::*safe_bool)();
- static std::size_t const all_states_id = static_cast<std::size_t>(-2);
- public:
- operator safe_bool() const
- { return initialized_dfa_ ? &dummy::true_ : 0; }
- typedef typename std::iterator_traits<Iterator>::value_type char_type;
- typedef std::basic_string<char_type> string_type;
- typedef boost::lexer::basic_rules<char_type> basic_rules_type;
- // Every lexer type to be used as a lexer for Spirit has to conform to
- // a public interface .
- typedef Token token_type;
- typedef typename Token::id_type id_type;
- typedef iterator<Functor> iterator_type;
- private:
- #ifdef _MSC_VER
- # pragma warning(push)
- # pragma warning(disable: 4512) // assignment operator could not be generated.
- #endif
- // this type is purely used for the iterator_type construction below
- struct iterator_data_type
- {
- typedef typename Functor::semantic_actions_type semantic_actions_type;
- iterator_data_type(
- boost::lexer::basic_state_machine<char_type> const& sm
- , boost::lexer::basic_rules<char_type> const& rules
- , semantic_actions_type const& actions)
- : state_machine_(sm), rules_(rules), actions_(actions)
- {}
- boost::lexer::basic_state_machine<char_type> const& state_machine_;
- boost::lexer::basic_rules<char_type> const& rules_;
- semantic_actions_type const& actions_;
- };
- #ifdef _MSC_VER
- # pragma warning(pop)
- #endif
- public:
- // Return the start iterator usable for iterating over the generated
- // tokens.
- iterator_type begin(Iterator& first, Iterator const& last
- , char_type const* initial_state = 0) const
- {
- if (!init_dfa()) // never minimize DFA for dynamic lexers
- return iterator_type();
- iterator_data_type iterator_data(state_machine_, rules_, actions_);
- return iterator_type(iterator_data, first, last, initial_state);
- }
- // Return the end iterator usable to stop iterating over the generated
- // tokens.
- iterator_type end() const
- {
- return iterator_type();
- }
- protected:
- // Lexer instances can be created by means of a derived class only.
- lexer(unsigned int flags)
- : flags_(detail::map_flags(flags))
- , rules_(flags_)
- , initialized_dfa_(false)
- {}
- public:
- // interface for token definition management
- std::size_t add_token(char_type const* state, char_type tokendef,
- std::size_t token_id, char_type const* targetstate)
- {
- add_state(state);
- initialized_dfa_ = false;
- if (state == all_states())
- return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot());
- if (0 == targetstate)
- targetstate = state;
- else
- add_state(targetstate);
- return rules_.add(state, detail::escape(tokendef), token_id, targetstate);
- }
- std::size_t add_token(char_type const* state, string_type const& tokendef,
- std::size_t token_id, char_type const* targetstate)
- {
- add_state(state);
- initialized_dfa_ = false;
- if (state == all_states())
- return rules_.add(state, tokendef, token_id, rules_.dot());
- if (0 == targetstate)
- targetstate = state;
- else
- add_state(targetstate);
- return rules_.add(state, tokendef, token_id, targetstate);
- }
- // interface for pattern definition management
- void add_pattern (char_type const* state, string_type const& name,
- string_type const& patterndef)
- {
- add_state(state);
- rules_.add_macro(name.c_str(), patterndef);
- initialized_dfa_ = false;
- }
- boost::lexer::rules const& get_rules() const { return rules_; }
- void clear(char_type const* state)
- {
- std::size_t s = rules_.state(state);
- if (boost::lexer::npos != s)
- rules_.clear(state);
- initialized_dfa_ = false;
- }
- std::size_t add_state(char_type const* state)
- {
- if (state == all_states())
- return all_states_id;
- std::size_t stateid = rules_.state(state);
- if (boost::lexer::npos == stateid) {
- stateid = rules_.add_state(state);
- initialized_dfa_ = false;
- }
- return stateid;
- }
- string_type initial_state() const
- {
- return string_type(rules_.initial());
- }
- string_type all_states() const
- {
- return string_type(rules_.all_states());
- }
- // Register a semantic action with the given id
- template <typename F>
- void add_action(std::size_t unique_id, std::size_t state, F act)
- {
- // If you see an error here stating add_action is not a member of
- // fusion::unused_type then you are probably having semantic actions
- // attached to at least one token in the lexer definition without
- // using the lex::lexertl::actor_lexer<> as its base class.
- typedef typename Functor::wrap_action_type wrapper_type;
- if (state == all_states_id) {
- // add the action to all known states
- typedef typename
- basic_rules_type::string_size_t_map::const_iterator
- state_iterator;
- std::size_t states = rules_.statemap().size();
- for (state_iterator it = rules_.statemap().begin(),
- end = rules_.statemap().end(); it != end; ++it) {
- for (std::size_t j = 0; j < states; ++j)
- actions_.add_action(unique_id + j, it->second, wrapper_type::call(act));
- }
- }
- else {
- actions_.add_action(unique_id, state, wrapper_type::call(act));
- }
- }
- // template <typename F>
- // void add_action(std::size_t unique_id, char_type const* state, F act)
- // {
- // typedef typename Functor::wrap_action_type wrapper_type;
- // actions_.add_action(unique_id, add_state(state), wrapper_type::call(act));
- // }
- // We do not minimize the state machine by default anymore because
- // Ben said: "If you can afford to generate a lexer at runtime, there
- // is little point in calling minimise."
- // Go figure.
- bool init_dfa(bool minimize = false) const
- {
- if (!initialized_dfa_) {
- state_machine_.clear();
- typedef boost::lexer::basic_generator<char_type> generator;
- generator::build (rules_, state_machine_);
- if (minimize)
- generator::minimise (state_machine_);
- #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
- boost::lexer::debug::dump(state_machine_, std::cerr);
- #endif
- initialized_dfa_ = true;
- // // release memory held by rules description
- // basic_rules_type rules;
- // rules.init_state_info(rules_); // preserve states
- // std::swap(rules, rules_);
- }
- return true;
- }
- private:
- // lexertl specific data
- mutable boost::lexer::basic_state_machine<char_type> state_machine_;
- boost::lexer::regex_flags flags_;
- /*mutable*/ basic_rules_type rules_;
- typename Functor::semantic_actions_type actions_;
- mutable bool initialized_dfa_;
- // generator functions must be able to access members directly
- template <typename Lexer, typename F>
- friend bool generate_static(Lexer const&
- , std::basic_ostream<typename Lexer::char_type>&
- , typename Lexer::char_type const*, F);
- };
- ///////////////////////////////////////////////////////////////////////////
- //
- // The actor_lexer class is another implementation of a Spirit.Lex
- // lexer on top of Ben Hanson's lexertl library as outlined above (For
- // more information about lexertl go here:
- // http://www.benhanson.net/lexertl.html).
- //
- // The only difference to the lexer class above is that token_def
- // definitions may have semantic (lexer) actions attached while being
- // defined:
- //
- // int w;
- // token_def word = "[^ \t\n]+";
- // self = word[++ref(w)]; // see example: word_count_lexer
- //
- // This class is supposed to be used as the first and only template
- // parameter while instantiating instances of a lex::lexer class.
- //
- ///////////////////////////////////////////////////////////////////////////
- template <typename Token = token<>
- , typename Iterator = typename Token::iterator_type
- , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> >
- class actor_lexer : public lexer<Token, Iterator, Functor>
- {
- protected:
- // Lexer instances can be created by means of a derived class only.
- actor_lexer(unsigned int flags)
- : lexer<Token, Iterator, Functor>(flags) {}
- };
- }}}}
- #endif
|