123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576 |
- // generate_cpp.hpp
- // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_CPP_HPP
- #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_CPP_HPP
- #include "char_traits.hpp"
- #include "consts.hpp"
- #include "internals.hpp"
- #include "runtime_error.hpp"
- #include "size_t.hpp"
- #include "state_machine.hpp"
- #include <iosfwd>
- #include <vector>
- namespace boost
- {
- namespace lexer
- {
- template<typename CharT>
- void generate_cpp (const basic_state_machine<CharT> &state_machine_,
- std::ostream &os_, const bool use_pointers_ = false,
- const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
- const char *name_ = "next_token")
- {
- const detail::internals &sm_ = state_machine_.data ();
- if (sm_._lookup->size () == 0)
- {
- throw runtime_error ("Cannot generate code from an empty "
- "state machine");
- }
- std::string upper_name_ (__DATE__);
- const std::size_t lookups_ = sm_._lookup->front ()->size ();
- const std::size_t dfas_ = sm_._dfa->size ();
- std::string::size_type pos_ = upper_name_.find (' ');
- const char *iterator_ = 0;
- if (use_pointers_)
- {
- if (lookups_ == 256)
- {
- iterator_ = "const char *";
- }
- else
- {
- iterator_ = "const wchar_t *";
- }
- }
- else
- {
- iterator_ = "Iterator &";
- }
- while (pos_ != std::string::npos)
- {
- upper_name_.replace (pos_, 1, "_");
- pos_ = upper_name_.find (' ', pos_);
- }
- upper_name_ += '_';
- upper_name_ += __TIME__;
- pos_ = upper_name_.find (':');
- while (pos_ != std::string::npos)
- {
- upper_name_.erase (pos_, 1);
- pos_ = upper_name_.find (':', pos_);
- }
- upper_name_ = '_' + upper_name_;
- upper_name_ = name_ + upper_name_;
- std::transform (upper_name_.begin (), upper_name_.end (),
- upper_name_.begin (), ::toupper);
- os_ << "#ifndef " << upper_name_ + '\n';
- os_ << "#define " << upper_name_ + '\n';
- os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
- os_ << "//\n";
- os_ << "// Distributed under the Boost Software License, "
- "Version 1.0. (See accompanying\n";
- os_ << "// file licence_1_0.txt or copy at "
- "http://www.boost.org/LICENSE_1_0.txt)\n\n";
- os_ << "// Auto-generated by boost::lexer\n";
- os_ << "template<typename Iterator>\n";
- os_ << "std::size_t " << name_ << " (";
- if (dfas_ > 1 || !optimise_parameters_)
- {
- os_ << "std::size_t &start_state_, ";
- }
- if (use_pointers_)
- {
- os_ << iterator_ << " &";
- }
- else
- {
- os_ << iterator_;
- }
- os_ << "start_token_, ";
- if (use_pointers_)
- {
- os_ << iterator_ << " const ";
- }
- else
- {
- os_ << "const " << iterator_;
- }
- os_ << "end_, \n";
- os_ << " std::size_t &unique_id_";
- if (sm_._seen_BOL_assertion || !optimise_parameters_)
- {
- os_ << ", bool &beg_of_line_";
- }
- os_ << ")\n";
- os_ << "{\n";
- os_ << " enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n";
- os_ << " eol_index, dead_state_index, dfa_offset};\n";
- os_ << " static const std::size_t npos = static_cast"
- "<std::size_t>(~0);\n";
- if (dfas_ > 1)
- {
- std::size_t state_ = 0;
- for (; state_ < dfas_; ++state_)
- {
- std::size_t i_ = 0;
- std::size_t j_ = 1;
- std::size_t count_ = lookups_ / 8;
- const std::size_t *lookup_ = &sm_._lookup[state_]->front ();
- const std::size_t *dfa_ = &sm_._dfa[state_]->front ();
- os_ << " static const std::size_t lookup" << state_ << "_[" <<
- lookups_ << "] = {";
- for (; i_ < count_; ++i_)
- {
- const std::size_t index_ = i_ * 8;
- os_ << lookup_[index_];
- for (; j_ < 8; ++j_)
- {
- os_ << ", " << lookup_[index_ + j_];
- }
- if (i_ < count_ - 1)
- {
- os_ << "," << std::endl << " ";
- }
- j_ = 1;
- }
- os_ << "};\n";
- count_ = sm_._dfa[state_]->size ();
- os_ << " static const std::size_t dfa" << state_ << "_[" <<
- count_ << "] = {";
- count_ /= 8;
- for (i_ = 0; i_ < count_; ++i_)
- {
- const std::size_t index_ = i_ * 8;
- os_ << dfa_[index_];
- for (j_ = 1; j_ < 8; ++j_)
- {
- os_ << ", " << dfa_[index_ + j_];
- }
- if (i_ < count_ - 1)
- {
- os_ << "," << std::endl << " ";
- }
- }
- const std::size_t mod_ = sm_._dfa[state_]->size () % 8;
- if (mod_)
- {
- const std::size_t index_ = count_ * 8;
- if (count_)
- {
- os_ << ",\n ";
- }
- os_ << dfa_[index_];
- for (j_ = 1; j_ < mod_; ++j_)
- {
- os_ << ", " << dfa_[index_ + j_];
- }
- }
- os_ << "};\n";
- }
- std::size_t count_ = sm_._dfa_alphabet.size ();
- std::size_t i_ = 1;
- os_ << " static const std::size_t *lookup_arr_[" << count_ <<
- "] = {";
- os_ << "lookup0_";
- for (i_ = 1; i_ < count_; ++i_)
- {
- os_ << ", " << "lookup" << i_ << "_";
- }
- os_ << "};\n";
- os_ << " static const std::size_t dfa_alphabet_arr_[" << count_ <<
- "] = {";
- os_ << sm_._dfa_alphabet.front ();
- for (i_ = 1; i_ < count_; ++i_)
- {
- os_ << ", " << sm_._dfa_alphabet[i_];
- }
- os_ << "};\n";
- os_ << " static const std::size_t *dfa_arr_[" << count_ <<
- "] = {";
- os_ << "dfa0_";
- for (i_ = 1; i_ < count_; ++i_)
- {
- os_ << ", " << "dfa" << i_ << "_";
- }
- os_ << "};\n";
- }
- else
- {
- const std::size_t *lookup_ = &sm_._lookup->front ()->front ();
- const std::size_t *dfa_ = &sm_._dfa->front ()->front ();
- std::size_t i_ = 0;
- std::size_t j_ = 1;
- std::size_t count_ = lookups_ / 8;
- os_ << " static const std::size_t lookup_[";
- os_ << sm_._lookup->front ()->size () << "] = {";
- for (; i_ < count_; ++i_)
- {
- const std::size_t index_ = i_ * 8;
- os_ << lookup_[index_];
- for (; j_ < 8; ++j_)
- {
- os_ << ", " << lookup_[index_ + j_];
- }
- if (i_ < count_ - 1)
- {
- os_ << "," << std::endl << " ";
- }
- j_ = 1;
- }
- os_ << "};\n";
- os_ << " static const std::size_t dfa_alphabet_ = " <<
- sm_._dfa_alphabet.front () << ";\n";
- os_ << " static const std::size_t dfa_[" <<
- sm_._dfa->front ()->size () << "] = {";
- count_ = sm_._dfa->front ()->size () / 8;
- for (i_ = 0; i_ < count_; ++i_)
- {
- const std::size_t index_ = i_ * 8;
- os_ << dfa_[index_];
- for (j_ = 1; j_ < 8; ++j_)
- {
- os_ << ", " << dfa_[index_ + j_];
- }
- if (i_ < count_ - 1)
- {
- os_ << "," << std::endl << " ";
- }
- }
- const std::size_t mod_ = sm_._dfa->front ()->size () % 8;
- if (mod_)
- {
- const std::size_t index_ = count_ * 8;
- if (count_)
- {
- os_ << ",\n ";
- }
- os_ << dfa_[index_];
- for (j_ = 1; j_ < mod_; ++j_)
- {
- os_ << ", " << dfa_[index_ + j_];
- }
- }
- os_ << "};\n";
- }
- os_ << "\n if (start_token_ == end_)\n";
- os_ << " {\n";
- os_ << " unique_id_ = npos;\n";
- os_ << " return 0;\n";
- os_ << " }\n\n";
- if (dfas_ > 1)
- {
- os_ << "again:\n";
- os_ << " const std::size_t * lookup_ = "
- "lookup_arr_[start_state_];\n";
- os_ << " std::size_t dfa_alphabet_ = "
- "dfa_alphabet_arr_[start_state_];\n";
- os_ << " const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
- }
- os_ << " const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
- os_ << " Iterator curr_ = start_token_;\n";
- os_ << " bool end_state_ = *ptr_ != 0;\n";
- os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
- os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
- if (dfas_ > 1)
- {
- os_ << " std::size_t end_start_state_ = start_state_;\n";
- }
- if (sm_._seen_BOL_assertion)
- {
- os_ << " bool bol_ = beg_of_line_;\n";
- os_ << " bool end_bol_ = bol_;\n";
- }
- os_ << " Iterator end_token_ = start_token_;\n";
- os_ << '\n';
- os_ << " while (curr_ != end_)\n";
- os_ << " {\n";
- if (sm_._seen_BOL_assertion)
- {
- os_ << " const std::size_t BOL_state_ = ptr_[bol_index];\n";
- }
- if (sm_._seen_EOL_assertion)
- {
- os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
- }
- if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
- {
- os_ << '\n';
- }
- if (sm_._seen_BOL_assertion)
- {
- os_ << " if (BOL_state_ && bol_)\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
- }
- if (sm_._seen_EOL_assertion)
- {
- os_ << " ";
- if (sm_._seen_BOL_assertion)
- {
- os_ << "else ";
- }
- os_ << "if (EOL_state_ && *curr_ == '\\n')\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << " }\n";
- }
- std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? " " : "");
- if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
- {
- os_ << " else\n";
- os_ << " {\n";
- }
- if (sm_._seen_BOL_assertion)
- {
- os_ << " ";
- if (lookups_ == 256)
- {
- os_ << "char";
- }
- else
- {
- os_ << "wchar_t";
- }
- os_ << " prev_char_ = *curr_++;\n\n";
- os_ << " bol_ = prev_char_ == '\\n';\n\n";
- }
- os_ << tab_;
- os_ << " const std::size_t state_ =\n";
- os_ << tab_;
- os_ << " ptr_[lookup_[";
- if (lookups_ == 256)
- {
- os_ << "static_cast<unsigned char>(";
- }
- if (sm_._seen_BOL_assertion)
- {
- os_ << "prev_char";
- }
- else
- {
- os_ << "*curr_++";
- }
- if (lookups_ == 256)
- {
- os_ << ')';
- }
- os_ << "]];\n\n";
- os_ << tab_;
- os_ << " if (state_ == 0) break;\n\n";
- os_ << tab_;
- os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
- if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
- {
- os_ << " }\n";
- }
- os_ << '\n';
- os_ << " if (*ptr_)\n";
- os_ << " {\n";
- os_ << " end_state_ = true;\n";
- os_ << " id_ = *(ptr_ + id_index);\n";
- os_ << " uid_ = *(ptr_ + unique_id_index);\n";
- if (dfas_ > 1)
- {
- os_ << " end_start_state_ = *(ptr_ + state_index);\n";
- }
- if (sm_._seen_BOL_assertion)
- {
- os_ << " end_bol_ = bol_;\n";
- }
- os_ << " end_token_ = curr_;\n";
- os_ << " }\n";
- os_ << " }\n";
- os_ << '\n';
- if (sm_._seen_EOL_assertion)
- {
- os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
- os_ << '\n';
- os_ << " if (EOL_state_ && curr_ == end_)\n";
- os_ << " {\n";
- os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
- os_ << '\n';
- os_ << " if (*ptr_)\n";
- os_ << " {\n";
- os_ << " end_state_ = true;\n";
- os_ << " id_ = *(ptr_ + id_index);\n";
- os_ << " uid_ = *(ptr_ + unique_id_index);\n";
- if (dfas_ > 1)
- {
- os_ << " end_start_state_ = *(ptr_ + state_index);\n";
- }
- if (sm_._seen_BOL_assertion)
- {
- os_ << " end_bol_ = bol_;\n";
- }
- os_ << " end_token_ = curr_;\n";
- os_ << " }\n";
- os_ << " }\n";
- os_ << '\n';
- }
- os_ << " if (end_state_)\n";
- os_ << " {\n";
- os_ << " // return longest match\n";
- if (dfas_ > 1)
- {
- os_ << " start_state_ = end_start_state_;\n";
- }
- if (sm_._seen_BOL_assertion && dfas_ < 2)
- {
- os_ << " beg_of_line_ = end_bol_;\n";
- }
- os_ << " start_token_ = end_token_;\n";
- if (dfas_ > 1)
- {
- os_ << '\n';
- os_ << " if (id_ == 0)\n";
- os_ << " {\n";
- if (sm_._seen_BOL_assertion)
- {
- os_ << " bol_ = end_bol_;\n";
- }
- os_ << " goto again;\n";
- os_ << " }\n";
- if (sm_._seen_BOL_assertion)
- {
- os_ << " else\n";
- os_ << " {\n";
- os_ << " beg_of_line_ = end_bol_;\n";
- os_ << " }\n";
- }
- }
- os_ << " }\n";
- os_ << " else\n";
- os_ << " {\n";
- if (sm_._seen_BOL_assertion)
- {
- os_ << " beg_of_line_ = *start_token_ == '\\n';\n";
- }
- if (skip_unknown_)
- {
- os_ << " // No match causes char to be skipped\n";
- os_ << " ++start_token_;\n";
- }
- os_ << " id_ = npos;\n";
- os_ << " uid_ = npos;\n";
- os_ << " }\n";
- os_ << '\n';
- os_ << " unique_id_ = uid_;\n";
- os_ << " return id_;\n";
- os_ << "}\n";
- os_ << "\n#endif\n";
- }
- }
- }
- #endif
|