tokenize_and_parse.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM)
  6. #define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/spirit/home/qi/skip_over.hpp>
  11. #include <boost/spirit/home/qi/parse.hpp>
  12. #include <boost/spirit/home/qi/nonterminal/grammar.hpp>
  13. #include <boost/spirit/home/support/unused.hpp>
  14. #include <boost/spirit/home/lex/lexer.hpp>
  15. #include <boost/mpl/assert.hpp>
  16. namespace boost { namespace phoenix
  17. {
  18. template <typename Expr>
  19. struct actor;
  20. }}
  21. namespace boost { namespace spirit { namespace lex
  22. {
  23. ///////////////////////////////////////////////////////////////////////////
  24. // Import skip_flag enumerator type from Qi namespace
  25. using qi::skip_flag;
  26. ///////////////////////////////////////////////////////////////////////////
  27. //
  28. // The tokenize_and_parse() function is one of the main Spirit API
  29. // functions. It simplifies using a lexer as the underlying token source
  30. // while parsing a given input sequence.
  31. //
  32. // The function takes a pair of iterators spanning the underlying input
  33. // stream to parse, the lexer object (built from the token definitions)
  34. // and a parser object (built from the parser grammar definition).
  35. //
  36. // The second version of this function additionally takes an attribute to
  37. // be used as the top level data structure instance the parser should use
  38. // to store the recognized input to.
  39. //
  40. // The function returns true if the parsing succeeded (the given input
  41. // sequence has been successfully matched by the given grammar).
  42. //
  43. // first, last: The pair of iterators spanning the underlying input
  44. // sequence to parse. These iterators must at least
  45. // conform to the requirements of the std::intput_iterator
  46. // category.
  47. // On exit the iterator 'first' will be updated to the
  48. // position right after the last successfully matched
  49. // token.
  50. // lex: The lexer object (encoding the token definitions) to be
  51. // used to convert the input sequence into a sequence of
  52. // tokens. This token sequence is passed to the parsing
  53. // process. The LexerExpr type must conform to the
  54. // lexer interface described in the corresponding section
  55. // of the documentation.
  56. // xpr: The grammar object (encoding the parser grammar) to be
  57. // used to match the token sequence generated by the lex
  58. // object instance. The ParserExpr type must conform to
  59. // the grammar interface described in the corresponding
  60. // section of the documentation.
  61. // attr: The top level attribute passed to the parser. It will
  62. // be populated during the parsing of the input sequence.
  63. // On exit it will hold the 'parser result' corresponding
  64. // to the matched input sequence.
  65. //
  66. ///////////////////////////////////////////////////////////////////////////
  67. template <typename Iterator, typename Lexer, typename ParserExpr>
  68. inline bool
  69. tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex,
  70. ParserExpr const& xpr)
  71. {
  72. // Report invalid expression error as early as possible.
  73. // If you got an error_invalid_expression error message here,
  74. // then the expression (expr) is not a valid spirit qi expression.
  75. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  76. typename Lexer::iterator_type iter = lex.begin(first, last);
  77. return compile<qi::domain>(xpr).parse(
  78. iter, lex.end(), unused, unused, unused);
  79. }
  80. ///////////////////////////////////////////////////////////////////////////
  81. template <typename Iterator, typename Lexer, typename ParserExpr
  82. , typename Attribute>
  83. inline bool
  84. tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex
  85. , ParserExpr const& xpr, Attribute& attr)
  86. {
  87. // Report invalid expression error as early as possible.
  88. // If you got an error_invalid_expression error message here,
  89. // then the expression (expr) is not a valid spirit qi expression.
  90. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  91. typename Lexer::iterator_type iter = lex.begin(first, last);
  92. return compile<qi::domain>(xpr).parse(
  93. iter, lex.end(), unused, unused, attr);
  94. }
  95. ///////////////////////////////////////////////////////////////////////////
  96. //
  97. // The tokenize_and_phrase_parse() function is one of the main Spirit API
  98. // functions. It simplifies using a lexer as the underlying token source
  99. // while phrase parsing a given input sequence.
  100. //
  101. // The function takes a pair of iterators spanning the underlying input
  102. // stream to parse, the lexer object (built from the token definitions)
  103. // and a parser object (built from the parser grammar definition). The
  104. // additional skipper parameter will be used as the skip parser during
  105. // the parsing process.
  106. //
  107. // The second version of this function additionally takes an attribute to
  108. // be used as the top level data structure instance the parser should use
  109. // to store the recognized input to.
  110. //
  111. // The function returns true if the parsing succeeded (the given input
  112. // sequence has been successfully matched by the given grammar).
  113. //
  114. // first, last: The pair of iterators spanning the underlying input
  115. // sequence to parse. These iterators must at least
  116. // conform to the requirements of the std::intput_iterator
  117. // category.
  118. // On exit the iterator 'first' will be updated to the
  119. // position right after the last successfully matched
  120. // token.
  121. // lex: The lexer object (encoding the token definitions) to be
  122. // used to convert the input sequence into a sequence of
  123. // tokens. This token sequence is passed to the parsing
  124. // process. The LexerExpr type must conform to the
  125. // lexer interface described in the corresponding section
  126. // of the documentation.
  127. // xpr: The grammar object (encoding the parser grammar) to be
  128. // used to match the token sequence generated by the lex
  129. // object instance. The ParserExpr type must conform to
  130. // the grammar interface described in the corresponding
  131. // section of the documentation.
  132. // skipper: The skip parser to be used while parsing the given
  133. // input sequence. Note, the skip parser will have to
  134. // act on the same token sequence as the main parser
  135. // 'xpr'.
  136. // post_skip: The post_skip flag controls whether the function will
  137. // invoke an additional post skip after the main parser
  138. // returned.
  139. // attr: The top level attribute passed to the parser. It will
  140. // be populated during the parsing of the input sequence.
  141. // On exit it will hold the 'parser result' corresponding
  142. // to the matched input sequence.
  143. //
  144. ///////////////////////////////////////////////////////////////////////////
  145. template <typename Iterator, typename Lexer, typename ParserExpr
  146. , typename Skipper>
  147. inline bool
  148. tokenize_and_phrase_parse(Iterator& first, Iterator last
  149. , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
  150. , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip)
  151. {
  152. // Report invalid expression error as early as possible.
  153. // If you got an error_invalid_expression error message here,
  154. // then the expression (expr) is not a valid spirit qi expression.
  155. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  156. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
  157. typedef
  158. typename spirit::result_of::compile<qi::domain, Skipper>::type
  159. skipper_type;
  160. skipper_type const skipper_ = compile<qi::domain>(skipper);
  161. typename Lexer::iterator_type iter = lex.begin(first, last);
  162. typename Lexer::iterator_type end = lex.end();
  163. if (!compile<qi::domain>(xpr).parse(
  164. iter, end, unused, skipper_, unused))
  165. return false;
  166. // do a final post-skip
  167. if (post_skip == skip_flag::postskip)
  168. qi::skip_over(iter, end, skipper_);
  169. return true;
  170. }
  171. template <typename Iterator, typename Lexer, typename ParserExpr
  172. , typename Skipper, typename Attribute>
  173. inline bool
  174. tokenize_and_phrase_parse(Iterator& first, Iterator last
  175. , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
  176. , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr)
  177. {
  178. // Report invalid expression error as early as possible.
  179. // If you got an error_invalid_expression error message here,
  180. // then the expression (expr) is not a valid spirit qi expression.
  181. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  182. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
  183. typedef
  184. typename spirit::result_of::compile<qi::domain, Skipper>::type
  185. skipper_type;
  186. skipper_type const skipper_ = compile<qi::domain>(skipper);
  187. typename Lexer::iterator_type iter = lex.begin(first, last);
  188. typename Lexer::iterator_type end = lex.end();
  189. if (!compile<qi::domain>(xpr).parse(
  190. iter, end, unused, skipper_, attr))
  191. return false;
  192. // do a final post-skip
  193. if (post_skip == skip_flag::postskip)
  194. qi::skip_over(iter, end, skipper_);
  195. return true;
  196. }
  197. ///////////////////////////////////////////////////////////////////////////
  198. template <typename Iterator, typename Lexer, typename ParserExpr
  199. , typename Skipper, typename Attribute>
  200. inline bool
  201. tokenize_and_phrase_parse(Iterator& first, Iterator last
  202. , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
  203. , Attribute& attr)
  204. {
  205. return tokenize_and_phrase_parse(first, last, lex, xpr, skipper
  206. , skip_flag::postskip, attr);
  207. }
  208. ///////////////////////////////////////////////////////////////////////////
  209. //
  210. // The tokenize() function is one of the main Spirit API functions. It
  211. // simplifies using a lexer to tokenize a given input sequence. It's main
  212. // purpose is to use the lexer to tokenize all the input.
  213. //
  214. // The second version below discards all generated tokens afterwards.
  215. // This is useful whenever all the needed functionality has been
  216. // implemented directly inside the lexer semantic actions, which are being
  217. // executed while the tokens are matched.
  218. //
  219. // The function takes a pair of iterators spanning the underlying input
  220. // stream to scan, the lexer object (built from the token definitions),
  221. // and a (optional) functor being called for each of the generated tokens.
  222. //
  223. // The function returns true if the scanning of the input succeeded (the
  224. // given input sequence has been successfully matched by the given token
  225. // definitions).
  226. //
  227. // first, last: The pair of iterators spanning the underlying input
  228. // sequence to parse. These iterators must at least
  229. // conform to the requirements of the std::intput_iterator
  230. // category.
  231. // On exit the iterator 'first' will be updated to the
  232. // position right after the last successfully matched
  233. // token.
  234. // lex: The lexer object (encoding the token definitions) to be
  235. // used to convert the input sequence into a sequence of
  236. // tokens. The LexerExpr type must conform to the
  237. // lexer interface described in the corresponding section
  238. // of the documentation.
  239. // f: A functor (callable object) taking a single argument of
  240. // the token type and returning a bool, indicating whether
  241. // the tokenization should be canceled.
  242. // initial_state: The name of the state the lexer should start matching.
  243. // The default value is zero, causing the lexer to start
  244. // in its 'INITIAL' state.
  245. //
  246. ///////////////////////////////////////////////////////////////////////////
  247. namespace detail
  248. {
  249. template <typename Token, typename F>
  250. bool tokenize_callback(Token const& t, F f)
  251. {
  252. return f(t);
  253. }
  254. template <typename Token, typename Eval>
  255. bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
  256. {
  257. f(t);
  258. return true;
  259. }
  260. template <typename Token>
  261. bool tokenize_callback(Token const& t, void (*f)(Token const&))
  262. {
  263. f(t);
  264. return true;
  265. }
  266. template <typename Token>
  267. bool tokenize_callback(Token const& t, bool (*f)(Token const&))
  268. {
  269. return f(t);
  270. }
  271. }
  272. template <typename Iterator, typename Lexer, typename F>
  273. inline bool
  274. tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
  275. , typename Lexer::char_type const* initial_state = 0)
  276. {
  277. typedef typename Lexer::iterator_type iterator_type;
  278. iterator_type iter = lex.begin(first, last, initial_state);
  279. iterator_type end = lex.end();
  280. for (/**/; iter != end && token_is_valid(*iter); ++iter)
  281. {
  282. if (!detail::tokenize_callback(*iter, f))
  283. return false;
  284. }
  285. return (iter == end) ? true : false;
  286. }
  287. ///////////////////////////////////////////////////////////////////////////
  288. template <typename Iterator, typename Lexer>
  289. inline bool
  290. tokenize(Iterator& first, Iterator last, Lexer const& lex
  291. , typename Lexer::char_type const* initial_state = 0)
  292. {
  293. typedef typename Lexer::iterator_type iterator_type;
  294. iterator_type iter = lex.begin(first, last, initial_state);
  295. iterator_type end = lex.end();
  296. while (iter != end && token_is_valid(*iter))
  297. ++iter;
  298. return (iter == end) ? true : false;
  299. }
  300. }}}
  301. #endif