functor.hpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM)
  6. #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/mpl/bool.hpp>
  11. #include <boost/detail/workaround.hpp>
  12. #include <boost/spirit/home/lex/lexer/pass_flags.hpp>
  13. #include <boost/assert.hpp>
  14. #include <iterator> // for std::iterator_traits
  15. #if 0 != __COMO_VERSION__ || !BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
  16. #define BOOST_SPIRIT_STATIC_EOF 1
  17. #define BOOST_SPIRIT_EOF_PREFIX static
  18. #else
  19. #define BOOST_SPIRIT_EOF_PREFIX
  20. #endif
  21. namespace boost { namespace spirit { namespace lex { namespace lexertl
  22. {
  23. ///////////////////////////////////////////////////////////////////////////
  24. //
  25. // functor is a template usable as the functor object for the
  26. // multi_pass iterator allowing to wrap a lexertl based dfa into a
  27. // iterator based interface.
  28. //
  29. // Token: the type of the tokens produced by this functor
  30. // this needs to expose a constructor with the following
  31. // prototype:
  32. //
  33. // Token(std::size_t id, std::size_t state,
  34. // Iterator start, Iterator end)
  35. //
  36. // where 'id' is the token id, state is the lexer state,
  37. // this token has been matched in, and 'first' and 'end'
  38. // mark the start and the end of the token with respect
  39. // to the underlying character stream.
  40. // FunctorData:
  41. // this is expected to encapsulate the shared part of the
  42. // functor (see lex/lexer/lexertl/functor_data.hpp for an
  43. // example and documentation).
  44. // Iterator: the type of the underlying iterator
  45. // SupportsActors:
  46. // this is expected to be a mpl::bool_, if mpl::true_ the
  47. // functor invokes functors which (optionally) have
  48. // been attached to the token definitions.
  49. // SupportState:
  50. // this is expected to be a mpl::bool_, if mpl::true_ the
  51. // functor supports different lexer states,
  52. // otherwise no lexer state is supported.
  53. //
  54. ///////////////////////////////////////////////////////////////////////////
  55. template <typename Token
  56. , template <typename, typename, typename, typename> class FunctorData
  57. , typename Iterator = typename Token::iterator_type
  58. , typename SupportsActors = mpl::false_
  59. , typename SupportsState = typename Token::has_state>
  60. class functor
  61. {
  62. public:
  63. typedef typename
  64. std::iterator_traits<Iterator>::value_type
  65. char_type;
  66. private:
  67. // Needed by compilers not implementing the resolution to DR45. For
  68. // reference, see
  69. // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45.
  70. typedef typename Token::token_value_type token_value_type;
  71. friend class FunctorData<Iterator, SupportsActors, SupportsState
  72. , token_value_type>;
  73. #ifdef _MSC_VER
  74. # pragma warning(push)
  75. # pragma warning(disable: 4512) // assignment operator could not be generated.
  76. #endif
  77. // Helper template allowing to assign a value on exit
  78. template <typename T>
  79. struct assign_on_exit
  80. {
  81. assign_on_exit(T& dst, T const& src)
  82. : dst_(dst), src_(src) {}
  83. ~assign_on_exit()
  84. {
  85. dst_ = src_;
  86. }
  87. T& dst_;
  88. T const& src_;
  89. };
  90. #ifdef _MSC_VER
  91. # pragma warning(pop)
  92. #endif
  93. public:
  94. functor() {}
  95. #if BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
  96. // somehow VC7.1 needs this (meaningless) assignment operator
  97. functor& operator=(functor const& rhs)
  98. {
  99. return *this;
  100. }
  101. #endif
  102. ///////////////////////////////////////////////////////////////////////
  103. // interface to the iterator_policies::split_functor_input policy
  104. typedef Token result_type;
  105. typedef functor unique;
  106. typedef FunctorData<Iterator, SupportsActors, SupportsState
  107. , token_value_type> shared;
  108. BOOST_SPIRIT_EOF_PREFIX result_type const eof;
  109. ///////////////////////////////////////////////////////////////////////
  110. typedef Iterator iterator_type;
  111. typedef typename shared::semantic_actions_type semantic_actions_type;
  112. typedef typename shared::next_token_functor next_token_functor;
  113. typedef typename shared::get_state_name_type get_state_name_type;
  114. // this is needed to wrap the semantic actions in a proper way
  115. typedef typename shared::wrap_action_type wrap_action_type;
  116. ///////////////////////////////////////////////////////////////////////
  117. template <typename MultiPass>
  118. static result_type& get_next(MultiPass& mp, result_type& result)
  119. {
  120. typedef typename result_type::id_type id_type;
  121. shared& data = mp.shared()->ftor;
  122. for(;;)
  123. {
  124. if (data.get_first() == data.get_last())
  125. #if defined(BOOST_SPIRIT_STATIC_EOF)
  126. return result = eof;
  127. #else
  128. return result = mp.ftor.eof;
  129. #endif
  130. data.reset_value();
  131. Iterator end = data.get_first();
  132. std::size_t unique_id = boost::lexer::npos;
  133. bool prev_bol = false;
  134. // lexer matching might change state
  135. std::size_t state = data.get_state();
  136. std::size_t id = data.next(end, unique_id, prev_bol);
  137. if (boost::lexer::npos == id) { // no match
  138. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  139. std::string next;
  140. Iterator it = data.get_first();
  141. for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
  142. next += *it;
  143. std::cerr << "Not matched, in state: " << state
  144. << ", lookahead: >" << next << "<" << std::endl;
  145. #endif
  146. return result = result_type(0);
  147. }
  148. else if (0 == id) { // EOF reached
  149. #if defined(BOOST_SPIRIT_STATIC_EOF)
  150. return result = eof;
  151. #else
  152. return result = mp.ftor.eof;
  153. #endif
  154. }
  155. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  156. {
  157. std::string next;
  158. Iterator it = end;
  159. for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
  160. next += *it;
  161. std::cerr << "Matched: " << id << ", in state: "
  162. << state << ", string: >"
  163. << std::basic_string<char_type>(data.get_first(), end) << "<"
  164. << ", lookahead: >" << next << "<" << std::endl;
  165. if (data.get_state() != state) {
  166. std::cerr << "Switched to state: "
  167. << data.get_state() << std::endl;
  168. }
  169. }
  170. #endif
  171. // account for a possibly pending lex::more(), i.e. moving
  172. // data.first_ back to the start of the previously matched token.
  173. bool adjusted = data.adjust_start();
  174. // set the end of the matched input sequence in the token data
  175. data.set_end(end);
  176. // invoke attached semantic actions, if defined, might change
  177. // state, id, data.first_, and/or end
  178. BOOST_SCOPED_ENUM(pass_flags) pass =
  179. data.invoke_actions(state, id, unique_id, end);
  180. if (data.has_value()) {
  181. // return matched token using the token value as set before
  182. // using data.set_value(), advancing 'data.first_' past the
  183. // matched sequence
  184. assign_on_exit<Iterator> on_exit(data.get_first(), end);
  185. return result = result_type(id_type(id), state, data.get_value());
  186. }
  187. else if (pass_flags::pass_normal == pass) {
  188. // return matched token, advancing 'data.first_' past the
  189. // matched sequence
  190. assign_on_exit<Iterator> on_exit(data.get_first(), end);
  191. return result = result_type(id_type(id), state, data.get_first(), end);
  192. }
  193. else if (pass_flags::pass_fail == pass) {
  194. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  195. std::cerr << "Matching forced to fail" << std::endl;
  196. #endif
  197. // if the data.first_ got adjusted above, revert this adjustment
  198. if (adjusted)
  199. data.revert_adjust_start();
  200. // one of the semantic actions signaled no-match
  201. data.reset_bol(prev_bol);
  202. if (state != data.get_state())
  203. continue; // retry matching if state has changed
  204. // if the state is unchanged repeating the match wouldn't
  205. // move the input forward, causing an infinite loop
  206. return result = result_type(0);
  207. }
  208. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  209. std::cerr << "Token ignored, continuing matching" << std::endl;
  210. #endif
  211. // if this token needs to be ignored, just repeat the matching,
  212. // while starting right after the current match
  213. data.get_first() = end;
  214. }
  215. }
  216. // set_state are propagated up to the iterator interface, allowing to
  217. // manipulate the current lexer state through any of the exposed
  218. // iterators.
  219. template <typename MultiPass>
  220. static std::size_t set_state(MultiPass& mp, std::size_t state)
  221. {
  222. std::size_t oldstate = mp.shared()->ftor.get_state();
  223. mp.shared()->ftor.set_state(state);
  224. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  225. std::cerr << "Switching state from: " << oldstate
  226. << " to: " << state
  227. << std::endl;
  228. #endif
  229. return oldstate;
  230. }
  231. template <typename MultiPass>
  232. static std::size_t get_state(MultiPass& mp)
  233. {
  234. return mp.shared()->ftor.get_state();
  235. }
  236. template <typename MultiPass>
  237. static std::size_t
  238. map_state(MultiPass const& mp, char_type const* statename)
  239. {
  240. return mp.shared()->ftor.get_state_id(statename);
  241. }
  242. // we don't need this, but it must be there
  243. template <typename MultiPass>
  244. static void destroy(MultiPass const&) {}
  245. };
  246. #if defined(BOOST_SPIRIT_STATIC_EOF)
  247. ///////////////////////////////////////////////////////////////////////////
  248. // eof token
  249. ///////////////////////////////////////////////////////////////////////////
  250. template <typename Token
  251. , template <typename, typename, typename, typename> class FunctorData
  252. , typename Iterator, typename SupportsActors, typename SupportsState>
  253. typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const
  254. functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof =
  255. typename functor<Token, FunctorData, Iterator, SupportsActors
  256. , SupportsState>::result_type();
  257. #endif
  258. }}}}
  259. #undef BOOST_SPIRIT_EOF_PREFIX
  260. #undef BOOST_SPIRIT_STATIC_EOF
  261. #endif