static_lexer.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEX_STATIC_LEXER_FEB_10_2008_0753PM)
  6. #define BOOST_SPIRIT_LEX_STATIC_LEXER_FEB_10_2008_0753PM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
  11. #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
  12. #include <boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp>
  13. #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
  14. #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
  15. #if defined(BOOST_SPIRIT_DEBUG)
  16. #include <boost/spirit/home/support/detail/lexer/debug.hpp>
  17. #endif
  18. #include <iterator> // for std::iterator_traits
  19. namespace boost { namespace spirit { namespace lex { namespace lexertl
  20. {
  21. ///////////////////////////////////////////////////////////////////////////
  22. // forward declaration
  23. ///////////////////////////////////////////////////////////////////////////
  24. namespace static_
  25. {
  26. struct lexer;
  27. }
  28. ///////////////////////////////////////////////////////////////////////////
  29. //
  30. // Every lexer type to be used as a lexer for Spirit has to conform to
  31. // the following public interface:
  32. //
  33. // typedefs:
  34. // iterator_type The type of the iterator exposed by this lexer.
  35. // token_type The type of the tokens returned from the exposed
  36. // iterators.
  37. //
  38. // functions:
  39. // default constructor
  40. // Since lexers are instantiated as base classes
  41. // only it might be a good idea to make this
  42. // constructor protected.
  43. // begin, end Return a pair of iterators, when dereferenced
  44. // returning the sequence of tokens recognized in
  45. // the input stream given as the parameters to the
  46. // begin() function.
  47. // add_token Should add the definition of a token to be
  48. // recognized by this lexer.
  49. // clear Should delete all current token definitions
  50. // associated with the given state of this lexer
  51. // object.
  52. //
  53. // template parameters:
  54. // Token The type of the tokens to be returned from the
  55. // exposed token iterator.
  56. // LexerTables See explanations below.
  57. // Iterator The type of the iterator used to access the
  58. // underlying character stream.
  59. // Functor The type of the InputPolicy to use to instantiate
  60. // the multi_pass iterator type to be used as the
  61. // token iterator (returned from begin()/end()).
  62. //
  63. // Additionally, this implementation of a static lexer has a template
  64. // parameter LexerTables allowing to customize the static lexer tables
  65. // to be used. The LexerTables is expected to be a type exposing
  66. // the following functions:
  67. //
  68. // static std::size_t const state_count()
  69. //
  70. // This function needs toreturn the number of lexer states
  71. // contained in the table returned from the state_names()
  72. // function.
  73. //
  74. // static char const* const* state_names()
  75. //
  76. // This function needs to return a pointer to a table of
  77. // names of all lexer states. The table needs to have as
  78. // much entries as the state_count() function returns
  79. //
  80. // template<typename Iterator>
  81. // std::size_t next(std::size_t &start_state_, Iterator const& start_
  82. // , Iterator &start_token_, Iterator const& end_
  83. // , std::size_t& unique_id_);
  84. //
  85. // This function is expected to return the next matched
  86. // token from the underlying input stream.
  87. //
  88. ///////////////////////////////////////////////////////////////////////////
  89. ///////////////////////////////////////////////////////////////////////////
  90. //
  91. // The static_lexer class is a implementation of a Spirit.Lex
  92. // lexer on top of Ben Hanson's lexertl library (For more information
  93. // about lexertl go here: http://www.benhanson.net/lexertl.html).
  94. //
  95. // This class is designed to be used in conjunction with a generated,
  96. // static lexer. For more information see the documentation (The Static
  97. // Lexer Model).
  98. //
  99. // This class is supposed to be used as the first and only template
  100. // parameter while instantiating instances of a lex::lexer class.
  101. //
  102. ///////////////////////////////////////////////////////////////////////////
  103. template <typename Token = token<>
  104. , typename LexerTables = static_::lexer
  105. , typename Iterator = typename Token::iterator_type
  106. , typename Functor = functor<Token, detail::static_data, Iterator> >
  107. class static_lexer
  108. {
  109. private:
  110. struct dummy { void true_() {} };
  111. typedef void (dummy::*safe_bool)();
  112. public:
  113. // object is always valid
  114. operator safe_bool() const { return &dummy::true_; }
  115. typedef typename std::iterator_traits<Iterator>::value_type char_type;
  116. typedef std::basic_string<char_type> string_type;
  117. // Every lexer type to be used as a lexer for Spirit has to conform to
  118. // a public interface
  119. typedef Token token_type;
  120. typedef typename Token::id_type id_type;
  121. typedef iterator<Functor> iterator_type;
  122. private:
  123. #ifdef _MSC_VER
  124. # pragma warning(push)
  125. # pragma warning(disable: 4512) // assignment operator could not be generated.
  126. #endif
  127. // this type is purely used for the iterator_type construction below
  128. struct iterator_data_type
  129. {
  130. typedef typename Functor::next_token_functor next_token_functor;
  131. typedef typename Functor::semantic_actions_type semantic_actions_type;
  132. typedef typename Functor::get_state_name_type get_state_name_type;
  133. iterator_data_type(next_token_functor next
  134. , semantic_actions_type const& actions
  135. , get_state_name_type get_state_name, std::size_t num_states
  136. , bool bol)
  137. : next_(next), actions_(actions), get_state_name_(get_state_name)
  138. , num_states_(num_states), bol_(bol)
  139. {}
  140. next_token_functor next_;
  141. semantic_actions_type const& actions_;
  142. get_state_name_type get_state_name_;
  143. std::size_t num_states_;
  144. bool bol_;
  145. };
  146. #ifdef _MSC_VER
  147. # pragma warning(pop)
  148. #endif
  149. typedef LexerTables tables_type;
  150. // The following static assertion fires if the referenced static lexer
  151. // tables are generated by a different static lexer version as used for
  152. // the current compilation unit. Please regenerate your static lexer
  153. // tables before trying to create a static_lexer<> instance.
  154. BOOST_SPIRIT_ASSERT_MSG(
  155. tables_type::static_version == SPIRIT_STATIC_LEXER_VERSION
  156. , incompatible_static_lexer_version, (LexerTables));
  157. public:
  158. // Return the start iterator usable for iterating over the generated
  159. // tokens, the generated function next_token(...) is called to match
  160. // the next token from the input.
  161. template <typename Iterator_>
  162. iterator_type begin(Iterator_& first, Iterator_ const& last
  163. , char_type const* initial_state = 0) const
  164. {
  165. iterator_data_type iterator_data(
  166. &tables_type::template next<Iterator_>, actions_
  167. , &tables_type::state_name, tables_type::state_count()
  168. , tables_type::supports_bol
  169. );
  170. return iterator_type(iterator_data, first, last, initial_state);
  171. }
  172. // Return the end iterator usable to stop iterating over the generated
  173. // tokens.
  174. iterator_type end() const
  175. {
  176. return iterator_type();
  177. }
  178. protected:
  179. // Lexer instances can be created by means of a derived class only.
  180. static_lexer(unsigned int) : unique_id_(0) {}
  181. public:
  182. // interface for token definition management
  183. std::size_t add_token (char_type const*, char_type, std::size_t
  184. , char_type const*)
  185. {
  186. return unique_id_++;
  187. }
  188. std::size_t add_token (char_type const*, string_type const&
  189. , std::size_t, char_type const*)
  190. {
  191. return unique_id_++;
  192. }
  193. // interface for pattern definition management
  194. void add_pattern (char_type const*, string_type const&
  195. , string_type const&) {}
  196. void clear(char_type const*) {}
  197. std::size_t add_state(char_type const* state)
  198. {
  199. return detail::get_state_id(state, &tables_type::state_name
  200. , tables_type::state_count());
  201. }
  202. string_type initial_state() const
  203. {
  204. return tables_type::state_name(0);
  205. }
  206. // register a semantic action with the given id
  207. template <typename F>
  208. void add_action(id_type unique_id, std::size_t state, F act)
  209. {
  210. typedef typename Functor::wrap_action_type wrapper_type;
  211. actions_.add_action(unique_id, state, wrapper_type::call(act));
  212. }
  213. bool init_dfa(bool /*minimize*/ = false) const { return true; }
  214. private:
  215. typename Functor::semantic_actions_type actions_;
  216. std::size_t unique_id_;
  217. };
  218. ///////////////////////////////////////////////////////////////////////////
  219. //
  220. // The static_actor_lexer class is another implementation of a
  221. // Spirit.Lex lexer on top of Ben Hanson's lexertl library as outlined
  222. // above (For more information about lexertl go here:
  223. // http://www.benhanson.net/lexertl.html).
  224. //
  225. // Just as the static_lexer class it is meant to be used with
  226. // a statically generated lexer as outlined above.
  227. //
  228. // The only difference to the static_lexer class above is that
  229. // token_def definitions may have semantic (lexer) actions attached while
  230. // being defined:
  231. //
  232. // int w;
  233. // token_def<> word = "[^ \t\n]+";
  234. // self = word[++ref(w)]; // see example: word_count_lexer
  235. //
  236. // This class is supposed to be used as the first and only template
  237. // parameter while instantiating instances of a lex::lexer class.
  238. //
  239. ///////////////////////////////////////////////////////////////////////////
  240. template <typename Token = token<>
  241. , typename LexerTables = static_::lexer
  242. , typename Iterator = typename Token::iterator_type
  243. , typename Functor
  244. = functor<Token, detail::static_data, Iterator, mpl::true_> >
  245. class static_actor_lexer
  246. : public static_lexer<Token, LexerTables, Iterator, Functor>
  247. {
  248. protected:
  249. // Lexer instances can be created by means of a derived class only.
  250. static_actor_lexer(unsigned int flags)
  251. : static_lexer<Token, LexerTables, Iterator, Functor>(flags)
  252. {}
  253. };
  254. }}}}
  255. #endif