generate_static.hpp 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. // Copyright (c) 2008-2009 Ben Hanson
  2. // Copyright (c) 2008-2011 Hartmut Kaiser
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
  7. #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
  8. #if defined(_MSC_VER)
  9. #pragma once
  10. #endif
  11. #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
  12. #include <boost/spirit/home/support/detail/lexer/consts.hpp>
  13. #include <boost/spirit/home/support/detail/lexer/rules.hpp>
  14. #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
  15. #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  16. #include <boost/spirit/home/support/detail/lexer/debug.hpp>
  17. #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
  18. #include <boost/scoped_array.hpp>
  19. #include <cstring>
  20. #include <locale>
  21. ///////////////////////////////////////////////////////////////////////////////
  22. namespace boost { namespace spirit { namespace lex { namespace lexertl
  23. {
  24. namespace detail
  25. {
  26. ///////////////////////////////////////////////////////////////////////////
  27. template <typename CharT>
  28. struct string_lit;
  29. template <>
  30. struct string_lit<char>
  31. {
  32. static char get(char c) { return c; }
  33. static std::string get(char const* str = "") { return str; }
  34. };
  35. template <>
  36. struct string_lit<wchar_t>
  37. {
  38. static wchar_t get(char c)
  39. {
  40. typedef std::ctype<wchar_t> ctype_t;
  41. return std::use_facet<ctype_t>(std::locale()).widen(c);
  42. }
  43. static std::basic_string<wchar_t> get(char const* source = "")
  44. {
  45. using namespace std; // some systems have size_t in ns std
  46. size_t len = strlen(source);
  47. boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
  48. result.get()[len] = '\0';
  49. // working with wide character streams is supported only if the
  50. // platform provides the std::ctype<wchar_t> facet
  51. BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
  52. std::use_facet<std::ctype<wchar_t> >(std::locale())
  53. .widen(source, source + len, result.get());
  54. return result.get();
  55. }
  56. };
  57. template <typename Char>
  58. inline Char L(char c)
  59. {
  60. return string_lit<Char>::get(c);
  61. }
  62. template <typename Char>
  63. inline std::basic_string<Char> L(char const* c = "")
  64. {
  65. return string_lit<Char>::get(c);
  66. }
  67. ///////////////////////////////////////////////////////////////////////////
  68. template <typename Char>
  69. inline bool
  70. generate_delimiter(std::basic_ostream<Char> &os_)
  71. {
  72. os_ << std::basic_string<Char>(80, '/') << "\n";
  73. return os_.good();
  74. }
  75. ///////////////////////////////////////////////////////////////////////////
  76. // Generate a table of the names of the used lexer states, which is a bit
  77. // tricky, because the table stored with the rules is sorted based on the
  78. // names, but we need it sorted using the state ids.
  79. template <typename Char>
  80. inline bool
  81. generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
  82. , std::basic_ostream<Char> &os_, Char const* name_suffix)
  83. {
  84. // we need to re-sort the state names in ascending order of the state
  85. // ids, filling possible gaps in between later
  86. typedef typename
  87. boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
  88. state_iterator;
  89. typedef std::map<std::size_t, Char const*> reverse_state_map_type;
  90. reverse_state_map_type reverse_state_map;
  91. state_iterator send = rules_.statemap().end();
  92. for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
  93. {
  94. typedef typename reverse_state_map_type::value_type value_type;
  95. reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
  96. }
  97. generate_delimiter(os_);
  98. os_ << "// this table defines the names of the lexer states\n";
  99. os_ << boost::lexer::detail::strings<Char>::char_name()
  100. << " const* const lexer_state_names"
  101. << (name_suffix[0] ? "_" : "") << name_suffix
  102. << "[" << rules_.statemap().size() << "] = \n{\n";
  103. typedef typename reverse_state_map_type::iterator iterator;
  104. iterator rend = reverse_state_map.end();
  105. std::size_t last_id = 0;
  106. for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
  107. {
  108. for (/**/; last_id < (*rit).first; ++last_id)
  109. {
  110. os_ << " 0, // \"<undefined state>\"\n";
  111. }
  112. os_ << " "
  113. << boost::lexer::detail::strings<Char>::char_prefix()
  114. << "\"" << (*rit).second << "\"";
  115. if (++rit != rend)
  116. os_ << ",\n";
  117. else
  118. os_ << "\n"; // don't generate the final comma
  119. }
  120. os_ << "};\n\n";
  121. generate_delimiter(os_);
  122. os_ << "// this variable defines the number of lexer states\n";
  123. os_ << "std::size_t const lexer_state_count"
  124. << (name_suffix[0] ? "_" : "") << name_suffix
  125. << " = " << rules_.statemap().size() << ";\n\n";
  126. return os_.good();
  127. }
  128. template <typename Char>
  129. inline bool
  130. generate_cpp_state_table (std::basic_ostream<Char> &os_
  131. , Char const* name_suffix, bool bol, bool eol)
  132. {
  133. std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
  134. suffix += name_suffix;
  135. generate_delimiter(os_);
  136. os_ << "// this defines a generic accessors for the information above\n";
  137. os_ << "struct lexer" << suffix << "\n{\n";
  138. os_ << " // version number and feature-set of compatible static lexer engine\n";
  139. os_ << " enum\n";
  140. os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n";
  141. os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
  142. os_ << " supports_eol = " << std::boolalpha << eol << "\n";
  143. os_ << " };\n\n";
  144. os_ << " // return the number of lexer states\n";
  145. os_ << " static std::size_t state_count()\n";
  146. os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
  147. os_ << " // return the name of the lexer state as given by 'idx'\n";
  148. os_ << " static " << boost::lexer::detail::strings<Char>::char_name()
  149. << " const* state_name(std::size_t idx)\n";
  150. os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
  151. os_ << " // return the next matched token\n";
  152. os_ << " template<typename Iterator>\n";
  153. os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
  154. os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
  155. os_ << " {\n return next_token" << suffix
  156. << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
  157. os_ << "};\n\n";
  158. return os_.good();
  159. }
  160. ///////////////////////////////////////////////////////////////////////////
  161. // generate function body based on traversing the DFA tables
  162. template <typename Char>
  163. bool generate_function_body_dfa(std::basic_ostream<Char>& os_
  164. , boost::lexer::basic_state_machine<Char> const &sm_)
  165. {
  166. std::size_t const dfas_ = sm_.data()._dfa->size();
  167. std::size_t const lookups_ = sm_.data()._lookup->front()->size();
  168. os_ << " enum {end_state_index, id_index, unique_id_index, "
  169. "state_index, bol_index,\n";
  170. os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
  171. os_ << " static std::size_t const npos = "
  172. "static_cast<std::size_t>(~0);\n";
  173. if (dfas_ > 1)
  174. {
  175. for (std::size_t state_ = 0; state_ < dfas_; ++state_)
  176. {
  177. std::size_t i_ = 0;
  178. std::size_t j_ = 1;
  179. std::size_t count_ = lookups_ / 8;
  180. std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
  181. std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
  182. os_ << " static std::size_t const lookup" << state_
  183. << "_[" << lookups_ << "] = {\n ";
  184. for (/**/; i_ < count_; ++i_)
  185. {
  186. std::size_t const index_ = i_ * 8;
  187. os_ << lookup_[index_];
  188. for (/**/; j_ < 8; ++j_)
  189. {
  190. os_ << ", " << lookup_[index_ + j_];
  191. }
  192. if (i_ < count_ - 1)
  193. {
  194. os_ << ",\n ";
  195. }
  196. j_ = 1;
  197. }
  198. os_ << " };\n";
  199. count_ = sm_.data()._dfa[state_]->size ();
  200. os_ << " static const std::size_t dfa" << state_ << "_["
  201. << count_ << "] = {\n ";
  202. count_ /= 8;
  203. for (i_ = 0; i_ < count_; ++i_)
  204. {
  205. std::size_t const index_ = i_ * 8;
  206. os_ << dfa_[index_];
  207. for (j_ = 1; j_ < 8; ++j_)
  208. {
  209. os_ << ", " << dfa_[index_ + j_];
  210. }
  211. if (i_ < count_ - 1)
  212. {
  213. os_ << ",\n ";
  214. }
  215. }
  216. std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
  217. if (mod_)
  218. {
  219. std::size_t const index_ = count_ * 8;
  220. if (count_)
  221. {
  222. os_ << ",\n ";
  223. }
  224. os_ << dfa_[index_];
  225. for (j_ = 1; j_ < mod_; ++j_)
  226. {
  227. os_ << ", " << dfa_[index_ + j_];
  228. }
  229. }
  230. os_ << " };\n";
  231. }
  232. std::size_t count_ = sm_.data()._dfa_alphabet.size();
  233. std::size_t i_ = 1;
  234. os_ << " static std::size_t const* lookup_arr_[" << count_
  235. << "] = { lookup0_";
  236. for (i_ = 1; i_ < count_; ++i_)
  237. {
  238. os_ << ", " << "lookup" << i_ << "_";
  239. }
  240. os_ << " };\n";
  241. os_ << " static std::size_t const dfa_alphabet_arr_["
  242. << count_ << "] = { ";
  243. os_ << sm_.data()._dfa_alphabet.front ();
  244. for (i_ = 1; i_ < count_; ++i_)
  245. {
  246. os_ << ", " << sm_.data()._dfa_alphabet[i_];
  247. }
  248. os_ << " };\n";
  249. os_ << " static std::size_t const* dfa_arr_[" << count_
  250. << "] = { ";
  251. os_ << "dfa0_";
  252. for (i_ = 1; i_ < count_; ++i_)
  253. {
  254. os_ << ", " << "dfa" << i_ << "_";
  255. }
  256. os_ << " };\n";
  257. }
  258. else
  259. {
  260. std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
  261. std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
  262. std::size_t i_ = 0;
  263. std::size_t j_ = 1;
  264. std::size_t count_ = lookups_ / 8;
  265. os_ << " static std::size_t const lookup_[";
  266. os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
  267. for (/**/; i_ < count_; ++i_)
  268. {
  269. const std::size_t index_ = i_ * 8;
  270. os_ << lookup_[index_];
  271. for (/**/; j_ < 8; ++j_)
  272. {
  273. os_ << ", " << lookup_[index_ + j_];
  274. }
  275. if (i_ < count_ - 1)
  276. {
  277. os_ << ",\n ";
  278. }
  279. j_ = 1;
  280. }
  281. os_ << " };\n";
  282. os_ << " static std::size_t const dfa_alphabet_ = "
  283. << sm_.data()._dfa_alphabet.front () << ";\n";
  284. os_ << " static std::size_t const dfa_["
  285. << sm_.data()._dfa[0]->size () << "] = {\n ";
  286. count_ = sm_.data()._dfa[0]->size () / 8;
  287. for (i_ = 0; i_ < count_; ++i_)
  288. {
  289. const std::size_t index_ = i_ * 8;
  290. os_ << dfa_[index_];
  291. for (j_ = 1; j_ < 8; ++j_)
  292. {
  293. os_ << ", " << dfa_[index_ + j_];
  294. }
  295. if (i_ < count_ - 1)
  296. {
  297. os_ << ",\n ";
  298. }
  299. }
  300. const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
  301. if (mod_)
  302. {
  303. const std::size_t index_ = count_ * 8;
  304. if (count_)
  305. {
  306. os_ << ",\n ";
  307. }
  308. os_ << dfa_[index_];
  309. for (j_ = 1; j_ < mod_; ++j_)
  310. {
  311. os_ << ", " << dfa_[index_ + j_];
  312. }
  313. }
  314. os_ << " };\n";
  315. }
  316. os_ << "\n if (start_token_ == end_)\n";
  317. os_ << " {\n";
  318. os_ << " unique_id_ = npos;\n";
  319. os_ << " return 0;\n";
  320. os_ << " }\n\n";
  321. if (sm_.data()._seen_BOL_assertion)
  322. {
  323. os_ << " bool bol = bol_;\n\n";
  324. }
  325. if (dfas_ > 1)
  326. {
  327. os_ << "again:\n";
  328. os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
  329. os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
  330. os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
  331. }
  332. os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
  333. os_ << " Iterator curr_ = start_token_;\n";
  334. os_ << " bool end_state_ = *ptr_ != 0;\n";
  335. os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
  336. os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
  337. if (dfas_ > 1)
  338. {
  339. os_ << " std::size_t end_start_state_ = start_state_;\n";
  340. }
  341. if (sm_.data()._seen_BOL_assertion)
  342. {
  343. os_ << " bool end_bol_ = bol_;\n";
  344. }
  345. os_ << " Iterator end_token_ = start_token_;\n\n";
  346. os_ << " while (curr_ != end_)\n";
  347. os_ << " {\n";
  348. if (sm_.data()._seen_BOL_assertion)
  349. {
  350. os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
  351. }
  352. if (sm_.data()._seen_EOL_assertion)
  353. {
  354. os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
  355. }
  356. if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
  357. {
  358. os_ << " if (BOL_state_ && bol)\n";
  359. os_ << " {\n";
  360. os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
  361. os_ << " }\n";
  362. os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
  363. os_ << " {\n";
  364. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
  365. os_ << " }\n";
  366. os_ << " else\n";
  367. os_ << " {\n";
  368. if (lookups_ == 256)
  369. {
  370. os_ << " unsigned char index = \n";
  371. os_ << " static_cast<unsigned char>(*curr_++);\n";
  372. }
  373. else
  374. {
  375. os_ << " std::size_t index = *curr_++\n";
  376. }
  377. os_ << " bol = (index == '\\n') ? true : false;\n";
  378. os_ << " std::size_t const state_ = ptr_[\n";
  379. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  380. os_ << '\n';
  381. os_ << " if (state_ == 0) break;\n";
  382. os_ << '\n';
  383. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  384. os_ << " }\n\n";
  385. }
  386. else if (sm_.data()._seen_BOL_assertion)
  387. {
  388. os_ << " if (BOL_state_ && bol)\n";
  389. os_ << " {\n";
  390. os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
  391. os_ << " }\n";
  392. os_ << " else\n";
  393. os_ << " {\n";
  394. if (lookups_ == 256)
  395. {
  396. os_ << " unsigned char index = \n";
  397. os_ << " static_cast<unsigned char>(*curr_++);\n";
  398. }
  399. else
  400. {
  401. os_ << " std::size_t index = *curr_++\n";
  402. }
  403. os_ << " bol = (index == '\\n') ? true : false;\n";
  404. os_ << " std::size_t const state_ = ptr_[\n";
  405. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  406. os_ << '\n';
  407. os_ << " if (state_ == 0) break;\n";
  408. os_ << '\n';
  409. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  410. os_ << " }\n\n";
  411. }
  412. else if (sm_.data()._seen_EOL_assertion)
  413. {
  414. os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
  415. os_ << " {\n";
  416. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
  417. os_ << " }\n";
  418. os_ << " else\n";
  419. os_ << " {\n";
  420. if (lookups_ == 256)
  421. {
  422. os_ << " unsigned char index = \n";
  423. os_ << " static_cast<unsigned char>(*curr_++);\n";
  424. }
  425. else
  426. {
  427. os_ << " std::size_t index = *curr_++\n";
  428. }
  429. os_ << " bol = (index == '\\n') ? true : false;\n";
  430. os_ << " std::size_t const state_ = ptr_[\n";
  431. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  432. os_ << '\n';
  433. os_ << " if (state_ == 0) break;\n";
  434. os_ << '\n';
  435. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  436. os_ << " }\n\n";
  437. }
  438. else
  439. {
  440. os_ << " std::size_t const state_ =\n";
  441. if (lookups_ == 256)
  442. {
  443. os_ << " ptr_[lookup_["
  444. "static_cast<unsigned char>(*curr_++)]];\n";
  445. }
  446. else
  447. {
  448. os_ << " ptr_[lookup_[*curr_++]];\n";
  449. }
  450. os_ << '\n';
  451. os_ << " if (state_ == 0) break;\n";
  452. os_ << '\n';
  453. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
  454. }
  455. os_ << " if (*ptr_)\n";
  456. os_ << " {\n";
  457. os_ << " end_state_ = true;\n";
  458. os_ << " id_ = *(ptr_ + id_index);\n";
  459. os_ << " uid_ = *(ptr_ + unique_id_index);\n";
  460. if (dfas_ > 1)
  461. {
  462. os_ << " end_start_state_ = *(ptr_ + state_index);\n";
  463. }
  464. if (sm_.data()._seen_BOL_assertion)
  465. {
  466. os_ << " end_bol_ = bol;\n";
  467. }
  468. os_ << " end_token_ = curr_;\n";
  469. os_ << " }\n";
  470. os_ << " }\n\n";
  471. if (sm_.data()._seen_EOL_assertion)
  472. {
  473. os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
  474. os_ << " if (EOL_state_ && curr_ == end_)\n";
  475. os_ << " {\n";
  476. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
  477. os_ << " if (*ptr_)\n";
  478. os_ << " {\n";
  479. os_ << " end_state_ = true;\n";
  480. os_ << " id_ = *(ptr_ + id_index);\n";
  481. os_ << " uid_ = *(ptr_ + unique_id_index);\n";
  482. if (dfas_ > 1)
  483. {
  484. os_ << " end_start_state_ = *(ptr_ + state_index);\n";
  485. }
  486. if (sm_.data()._seen_BOL_assertion)
  487. {
  488. os_ << " end_bol_ = bol;\n";
  489. }
  490. os_ << " end_token_ = curr_;\n";
  491. os_ << " }\n";
  492. os_ << " }\n\n";
  493. }
  494. os_ << " if (end_state_)\n";
  495. os_ << " {\n";
  496. os_ << " // return longest match\n";
  497. os_ << " start_token_ = end_token_;\n";
  498. if (dfas_ > 1)
  499. {
  500. os_ << " start_state_ = end_start_state_;\n";
  501. os_ << " if (id_ == 0)\n";
  502. os_ << " {\n";
  503. if (sm_.data()._seen_BOL_assertion)
  504. {
  505. os_ << " bol = end_bol_;\n";
  506. }
  507. os_ << " goto again;\n";
  508. os_ << " }\n";
  509. if (sm_.data()._seen_BOL_assertion)
  510. {
  511. os_ << " else\n";
  512. os_ << " {\n";
  513. os_ << " bol_ = end_bol_;\n";
  514. os_ << " }\n";
  515. }
  516. }
  517. else if (sm_.data()._seen_BOL_assertion)
  518. {
  519. os_ << " bol_ = end_bol_;\n";
  520. }
  521. os_ << " }\n";
  522. os_ << " else\n";
  523. os_ << " {\n";
  524. if (sm_.data()._seen_BOL_assertion)
  525. {
  526. os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
  527. }
  528. os_ << " id_ = npos;\n";
  529. os_ << " uid_ = npos;\n";
  530. os_ << " }\n\n";
  531. os_ << " unique_id_ = uid_;\n";
  532. os_ << " return id_;\n";
  533. return os_.good();
  534. }
  535. ///////////////////////////////////////////////////////////////////////////
  536. template <typename Char>
  537. inline std::basic_string<Char> get_charlit(Char ch)
  538. {
  539. std::basic_string<Char> result;
  540. boost::lexer::basic_string_token<Char>::escape_char(ch, result);
  541. return result;
  542. }
  543. // check whether state0_0 is referenced from any of the other states
  544. template <typename Char>
  545. bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
  546. {
  547. typedef typename boost::lexer::basic_state_machine<Char>::iterator
  548. iterator_type;
  549. iterator_type iter_ = sm_.begin();
  550. std::size_t const states_ = iter_->states;
  551. for (std::size_t state_ = 0; state_ < states_; ++state_)
  552. {
  553. if (0 == iter_->bol_index || 0 == iter_->eol_index)
  554. {
  555. return true;
  556. }
  557. std::size_t const transitions_ = iter_->transitions;
  558. for (std::size_t t_ = 0; t_ < transitions_; ++t_)
  559. {
  560. if (0 == iter_->goto_state)
  561. {
  562. return true;
  563. }
  564. ++iter_;
  565. }
  566. if (transitions_ == 0) ++iter_;
  567. }
  568. return false;
  569. }
  570. ///////////////////////////////////////////////////////////////////////////
  571. template <typename Char>
  572. bool generate_function_body_switch(std::basic_ostream<Char> & os_
  573. , boost::lexer::basic_state_machine<Char> const &sm_)
  574. {
  575. typedef typename boost::lexer::basic_state_machine<Char>::iterator
  576. iterator_type;
  577. std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
  578. iterator_type iter_ = sm_.begin();
  579. iterator_type labeliter_ = iter_;
  580. iterator_type end_ = sm_.end();
  581. std::size_t const dfas_ = sm_.data()._dfa->size ();
  582. os_ << " static std::size_t const npos = "
  583. "static_cast<std::size_t>(~0);\n";
  584. os_ << "\n if (start_token_ == end_)\n";
  585. os_ << " {\n";
  586. os_ << " unique_id_ = npos;\n";
  587. os_ << " return 0;\n";
  588. os_ << " }\n\n";
  589. if (sm_.data()._seen_BOL_assertion)
  590. {
  591. os_ << " bool bol = bol_;\n";
  592. }
  593. if (dfas_ > 1)
  594. {
  595. os_ << "again:\n";
  596. }
  597. os_ << " Iterator curr_ = start_token_;\n";
  598. os_ << " bool end_state_ = false;\n";
  599. os_ << " std::size_t id_ = npos;\n";
  600. os_ << " std::size_t uid_ = npos;\n";
  601. if (dfas_ > 1)
  602. {
  603. os_ << " std::size_t end_start_state_ = start_state_;\n";
  604. }
  605. if (sm_.data()._seen_BOL_assertion)
  606. {
  607. os_ << " bool end_bol_ = bol_;\n";
  608. }
  609. os_ << " Iterator end_token_ = start_token_;\n";
  610. os_ << '\n';
  611. os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
  612. << " ch_ = 0;\n\n";
  613. if (dfas_ > 1)
  614. {
  615. os_ << " switch (start_state_)\n";
  616. os_ << " {\n";
  617. for (std::size_t i_ = 0; i_ < dfas_; ++i_)
  618. {
  619. os_ << " case " << i_ << ":\n";
  620. os_ << " goto state" << i_ << "_0;\n";
  621. os_ << " break;\n";
  622. }
  623. os_ << " default:\n";
  624. os_ << " goto end;\n";
  625. os_ << " break;\n";
  626. os_ << " }\n";
  627. }
  628. bool need_state0_0_label = need_label0_0(sm_);
  629. for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
  630. {
  631. std::size_t const states_ = iter_->states;
  632. for (std::size_t state_ = 0; state_ < states_; ++state_)
  633. {
  634. std::size_t const transitions_ = iter_->transitions;
  635. std::size_t t_ = 0;
  636. if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
  637. {
  638. os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
  639. }
  640. if (iter_->end_state)
  641. {
  642. os_ << " end_state_ = true;\n";
  643. os_ << " id_ = " << iter_->id << ";\n";
  644. os_ << " uid_ = " << iter_->unique_id << ";\n";
  645. os_ << " end_token_ = curr_;\n";
  646. if (dfas_ > 1)
  647. {
  648. os_ << " end_start_state_ = " << iter_->goto_dfa <<
  649. ";\n";
  650. }
  651. if (sm_.data()._seen_BOL_assertion)
  652. {
  653. os_ << " end_bol_ = bol;\n";
  654. }
  655. if (transitions_) os_ << '\n';
  656. }
  657. if (t_ < transitions_ ||
  658. iter_->bol_index != boost::lexer::npos ||
  659. iter_->eol_index != boost::lexer::npos)
  660. {
  661. os_ << " if (curr_ == end_) goto end;\n";
  662. os_ << " ch_ = *curr_;\n";
  663. if (iter_->bol_index != boost::lexer::npos)
  664. {
  665. os_ << "\n if (bol) goto state" << dfa_ << '_'
  666. << iter_->bol_index << ";\n";
  667. }
  668. if (iter_->eol_index != boost::lexer::npos)
  669. {
  670. os_ << "\n if (ch_ == '\\n') goto state" << dfa_
  671. << '_' << iter_->eol_index << ";\n";
  672. }
  673. os_ << " ++curr_;\n";
  674. }
  675. for (/**/; t_ < transitions_; ++t_)
  676. {
  677. Char const *ptr_ = iter_->token._charset.c_str();
  678. Char const *end2_ = ptr_ + iter_->token._charset.size();
  679. Char start_char_ = 0;
  680. Char curr_char_ = 0;
  681. bool range_ = false;
  682. bool first_char_ = true;
  683. os_ << "\n if (";
  684. while (ptr_ != end2_)
  685. {
  686. curr_char_ = *ptr_++;
  687. if (*ptr_ == curr_char_ + 1)
  688. {
  689. if (!range_)
  690. {
  691. start_char_ = curr_char_;
  692. }
  693. range_ = true;
  694. }
  695. else
  696. {
  697. if (!first_char_)
  698. {
  699. os_ << ((iter_->token._negated) ? " && " : " || ");
  700. }
  701. else
  702. {
  703. first_char_ = false;
  704. }
  705. if (range_)
  706. {
  707. if (iter_->token._negated)
  708. {
  709. os_ << "!";
  710. }
  711. os_ << "(ch_ >= '" << get_charlit(start_char_)
  712. << "' && ch_ <= '"
  713. << get_charlit(curr_char_) << "')";
  714. range_ = false;
  715. }
  716. else
  717. {
  718. os_ << "ch_ "
  719. << ((iter_->token._negated) ? "!=" : "==")
  720. << " '" << get_charlit(curr_char_) << "'";
  721. }
  722. }
  723. }
  724. os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
  725. << ";\n";
  726. ++iter_;
  727. }
  728. if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
  729. {
  730. os_ << " goto end;\n";
  731. }
  732. if (transitions_ == 0) ++iter_;
  733. }
  734. }
  735. os_ << "\nend:\n";
  736. os_ << " if (end_state_)\n";
  737. os_ << " {\n";
  738. os_ << " // return longest match\n";
  739. os_ << " start_token_ = end_token_;\n";
  740. if (dfas_ > 1)
  741. {
  742. os_ << " start_state_ = end_start_state_;\n";
  743. os_ << "\n if (id_ == 0)\n";
  744. os_ << " {\n";
  745. if (sm_.data()._seen_BOL_assertion)
  746. {
  747. os_ << " bol = end_bol_;\n";
  748. }
  749. os_ << " goto again;\n";
  750. os_ << " }\n";
  751. if (sm_.data()._seen_BOL_assertion)
  752. {
  753. os_ << " else\n";
  754. os_ << " {\n";
  755. os_ << " bol_ = end_bol_;\n";
  756. os_ << " }\n";
  757. }
  758. }
  759. else if (sm_.data()._seen_BOL_assertion)
  760. {
  761. os_ << " bol_ = end_bol_;\n";
  762. }
  763. os_ << " }\n";
  764. os_ << " else\n";
  765. os_ << " {\n";
  766. if (sm_.data()._seen_BOL_assertion)
  767. {
  768. os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
  769. }
  770. os_ << " id_ = npos;\n";
  771. os_ << " uid_ = npos;\n";
  772. os_ << " }\n\n";
  773. os_ << " unique_id_ = uid_;\n";
  774. os_ << " return id_;\n";
  775. return os_.good();
  776. }
  777. ///////////////////////////////////////////////////////////////////////////
  778. // Generate a tokenizer for the given state machine.
  779. template <typename Char, typename F>
  780. inline bool
  781. generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
  782. , boost::lexer::basic_rules<Char> const& rules_
  783. , std::basic_ostream<Char> &os_, Char const* name_suffix
  784. , F generate_function_body)
  785. {
  786. if (sm_.data()._lookup->empty())
  787. return false;
  788. std::size_t const dfas_ = sm_.data()._dfa->size();
  789. // std::size_t const lookups_ = sm_.data()._lookup->front()->size();
  790. os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
  791. os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
  792. os_ << "//\n";
  793. os_ << "// Distributed under the Boost Software License, "
  794. "Version 1.0. (See accompanying\n";
  795. os_ << "// file licence_1_0.txt or copy at "
  796. "http://www.boost.org/LICENSE_1_0.txt)\n\n";
  797. os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
  798. std::basic_string<Char> guard(name_suffix);
  799. guard += L<Char>(name_suffix[0] ? "_" : "");
  800. guard += L<Char>(__DATE__ "_" __TIME__);
  801. typename std::basic_string<Char>::size_type p =
  802. guard.find_first_of(L<Char>(": "));
  803. while (std::string::npos != p)
  804. {
  805. guard.replace(p, 1, L<Char>("_"));
  806. p = guard.find_first_of(L<Char>(": "), p);
  807. }
  808. { // to_upper(guard)
  809. typedef std::ctype<Char> facet_t;
  810. facet_t const& facet = std::use_facet<facet_t>(std::locale());
  811. typedef typename std::basic_string<Char>::iterator iter_t;
  812. for (iter_t iter = guard.begin(),
  813. last = guard.end(); iter != last; ++iter)
  814. *iter = facet.toupper(*iter);
  815. }
  816. os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
  817. os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
  818. os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
  819. generate_delimiter(os_);
  820. os_ << "// the generated table of state names and the tokenizer have to be\n"
  821. "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
  822. os_ << "namespace boost { namespace spirit { namespace lex { "
  823. "namespace lexertl { namespace static_ {\n\n";
  824. // generate the lexer state information variables
  825. if (!generate_cpp_state_info(rules_, os_, name_suffix))
  826. return false;
  827. generate_delimiter(os_);
  828. os_ << "// this function returns the next matched token\n";
  829. os_ << "template<typename Iterator>\n";
  830. os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
  831. << name_suffix << " (";
  832. if (dfas_ > 1)
  833. {
  834. os_ << "std::size_t& start_state_, ";
  835. }
  836. else
  837. {
  838. os_ << "std::size_t& /*start_state_*/, ";
  839. }
  840. if (sm_.data()._seen_BOL_assertion)
  841. {
  842. os_ << "bool& bol_, ";
  843. }
  844. else
  845. {
  846. os_ << "bool& /*bol_*/, ";
  847. }
  848. os_ << "\n ";
  849. os_ << "Iterator &start_token_, Iterator const& end_, ";
  850. os_ << "std::size_t& unique_id_)\n";
  851. os_ << "{\n";
  852. if (!generate_function_body(os_, sm_))
  853. return false;
  854. os_ << "}\n\n";
  855. if (!generate_cpp_state_table<Char>(os_, name_suffix
  856. , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
  857. {
  858. return false;
  859. }
  860. os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
  861. os_ << "#endif\n";
  862. return os_.good();
  863. }
  864. } // namespace detail
  865. ///////////////////////////////////////////////////////////////////////////
  866. template <typename Lexer, typename F>
  867. inline bool
  868. generate_static(Lexer const& lexer
  869. , std::basic_ostream<typename Lexer::char_type>& os
  870. , typename Lexer::char_type const* name_suffix, F f)
  871. {
  872. if (!lexer.init_dfa(true)) // always minimize DFA for static lexers
  873. return false;
  874. return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
  875. , name_suffix, f);
  876. }
  877. ///////////////////////////////////////////////////////////////////////////
  878. // deprecated function, will be removed in the future (this has been
  879. // replaced by the function generate_static_dfa - see below).
  880. template <typename Lexer>
  881. inline bool
  882. generate_static(Lexer const& lexer
  883. , std::basic_ostream<typename Lexer::char_type>& os
  884. , typename Lexer::char_type const* name_suffix =
  885. detail::L<typename Lexer::char_type>())
  886. {
  887. return generate_static(lexer, os, name_suffix
  888. , &detail::generate_function_body_dfa<typename Lexer::char_type>);
  889. }
  890. ///////////////////////////////////////////////////////////////////////////
  891. template <typename Lexer>
  892. inline bool
  893. generate_static_dfa(Lexer const& lexer
  894. , std::basic_ostream<typename Lexer::char_type>& os
  895. , typename Lexer::char_type const* name_suffix =
  896. detail::L<typename Lexer::char_type>())
  897. {
  898. return generate_static(lexer, os, name_suffix
  899. , &detail::generate_function_body_dfa<typename Lexer::char_type>);
  900. }
  901. ///////////////////////////////////////////////////////////////////////////
  902. template <typename Lexer>
  903. inline bool
  904. generate_static_switch(Lexer const& lexer
  905. , std::basic_ostream<typename Lexer::char_type>& os
  906. , typename Lexer::char_type const* name_suffix =
  907. detail::L<typename Lexer::char_type>())
  908. {
  909. return generate_static(lexer, os, name_suffix
  910. , &detail::generate_function_body_switch<typename Lexer::char_type>);
  911. }
  912. ///////////////////////////////////////////////////////////////////////////////
  913. }}}}
  914. #endif