123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432 |
- /*=============================================================================
- Boost.Wave: A Standard compliant C++ preprocessor library
- Re2C based C++ lexer
- http://www.boost.org/
- Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
- Software License, Version 1.0. (See accompanying file
- LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- =============================================================================*/
- #if !defined(BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
- #define BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
- #include <string>
- #include <cstdio>
- #include <cstdarg>
- #if defined(BOOST_SPIRIT_DEBUG)
- #include <iostream>
- #endif // defined(BOOST_SPIRIT_DEBUG)
- #include <boost/concept_check.hpp>
- #include <boost/assert.hpp>
- #include <boost/wave/wave_config.hpp>
- #include <boost/wave/language_support.hpp>
- #include <boost/wave/token_ids.hpp>
- #include <boost/wave/util/file_position.hpp>
- #include <boost/wave/cpplexer/validate_universal_char.hpp>
- #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
- #include <boost/wave/cpplexer/token_cache.hpp>
- #include <boost/wave/cpplexer/convert_trigraphs.hpp>
- #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
- #include <boost/wave/cpplexer/re2clex/scanner.hpp>
- #include <boost/wave/cpplexer/re2clex/cpp_re.hpp>
- #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
- #include <boost/wave/cpplexer/detect_include_guards.hpp>
- #endif
- #include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp>
- // this must occur after all of the includes and before any code appears
- #ifdef BOOST_HAS_ABI_HEADERS
- #include BOOST_ABI_PREFIX
- #endif
- ///////////////////////////////////////////////////////////////////////////////
- namespace boost {
- namespace wave {
- namespace cpplexer {
- namespace re2clex {
- ///////////////////////////////////////////////////////////////////////////////
- //
- // encapsulation of the re2c based cpp lexer
- //
- ///////////////////////////////////////////////////////////////////////////////
- template <typename IteratorT,
- typename PositionT = boost::wave::util::file_position_type,
- typename TokenT = lex_token<PositionT> >
- class lexer
- {
- public:
- typedef TokenT token_type;
- typedef typename token_type::string_type string_type;
- lexer(IteratorT const &first, IteratorT const &last,
- PositionT const &pos, boost::wave::language_support language_);
- ~lexer();
- token_type& get(token_type&);
- void set_position(PositionT const &pos)
- {
- // set position has to change the file name and line number only
- filename = pos.get_file();
- scanner.line = pos.get_line();
- // scanner.column = scanner.curr_column = pos.get_column();
- scanner.file_name = filename.c_str();
- }
- #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
- bool has_include_guards(std::string& guard_name) const
- {
- return guards.detected(guard_name);
- }
- #endif
- // error reporting from the re2c generated lexer
- static int report_error(Scanner<IteratorT> const* s, int code, char const *, ...);
- private:
- static char const *tok_names[];
- Scanner<IteratorT> scanner;
- string_type filename;
- string_type value;
- bool at_eof;
- boost::wave::language_support language;
- #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
- include_guards<token_type> guards;
- #endif
- #if BOOST_WAVE_SUPPORT_THREADING == 0
- static token_cache<string_type> const cache;
- #else
- token_cache<string_type> const cache;
- #endif
- };
- ///////////////////////////////////////////////////////////////////////////////
- // initialize cpp lexer
- template <typename IteratorT, typename PositionT, typename TokenT>
- inline
- lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first,
- IteratorT const &last, PositionT const &pos,
- boost::wave::language_support language_)
- : scanner(first, last),
- filename(pos.get_file()), at_eof(false), language(language_)
- #if BOOST_WAVE_SUPPORT_THREADING != 0
- , cache()
- #endif
- {
- using namespace std; // some systems have memset in std
- scanner.line = pos.get_line();
- scanner.column = scanner.curr_column = pos.get_column();
- scanner.error_proc = report_error;
- scanner.file_name = filename.c_str();
- #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
- scanner.enable_ms_extensions = true;
- #else
- scanner.enable_ms_extensions = false;
- #endif
- #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
- scanner.act_in_c99_mode = boost::wave::need_c99(language_);
- #endif
- #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
- scanner.enable_import_keyword = !boost::wave::need_c99(language_);
- #else
- scanner.enable_import_keyword = false;
- #endif
- scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_);
- scanner.single_line_only = boost::wave::need_single_line(language_);
- #if BOOST_WAVE_SUPPORT_CPP0X != 0
- scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_);
- #else
- scanner.act_in_cpp0x_mode = false;
- #endif
- #if BOOST_WAVE_SUPPORT_CPP2A != 0
- scanner.act_in_cpp2a_mode = boost::wave::need_cpp2a(language_);
- scanner.act_in_cpp0x_mode = boost::wave::need_cpp2a(language_)
- || boost::wave::need_cpp0x(language_);
- #else
- scanner.act_in_cpp2a_mode = false;
- #endif
- }
- template <typename IteratorT, typename PositionT, typename TokenT>
- inline
- lexer<IteratorT, PositionT, TokenT>::~lexer()
- {
- using namespace std; // some systems have free in std
- free(scanner.bot);
- }
- ///////////////////////////////////////////////////////////////////////////////
- // get the next token from the input stream
- template <typename IteratorT, typename PositionT, typename TokenT>
- inline TokenT&
- lexer<IteratorT, PositionT, TokenT>::get(TokenT& result)
- {
- if (at_eof)
- return result = token_type(); // return T_EOI
- std::size_t actline = scanner.line;
- token_id id = token_id(scan(&scanner));
- switch (id) {
- case T_IDENTIFIER:
- // test identifier characters for validity (throws if invalid chars found)
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- if (!boost::wave::need_no_character_validation(language))
- impl::validate_identifier_name(value, actline, scanner.column, filename);
- break;
- case T_STRINGLIT:
- case T_CHARLIT:
- case T_RAWSTRINGLIT:
- // test literal characters for validity (throws if invalid chars found)
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- if (boost::wave::need_convert_trigraphs(language))
- value = impl::convert_trigraphs(value);
- if (!boost::wave::need_no_character_validation(language))
- impl::validate_literal(value, actline, scanner.column, filename);
- break;
- case T_PP_HHEADER:
- case T_PP_QHEADER:
- case T_PP_INCLUDE:
- // convert to the corresponding ..._next token, if appropriate
- {
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
- // Skip '#' and whitespace and see whether we find an 'include_next' here.
- typename string_type::size_type start = value.find("include");
- if (value.compare(start, 12, "include_next", 12) == 0)
- id = token_id(id | AltTokenType);
- #endif
- break;
- }
- case T_LONGINTLIT: // supported in C++11, C99 and long_long mode
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- if (!boost::wave::need_long_long(language)) {
- // syntax error: not allowed in C++ mode
- BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal,
- value.c_str(), actline, scanner.column, filename.c_str());
- }
- break;
- case T_OCTALINT:
- case T_DECIMALINT:
- case T_HEXAINT:
- case T_INTLIT:
- case T_FLOATLIT:
- case T_FIXEDPOINTLIT:
- case T_CCOMMENT:
- case T_CPPCOMMENT:
- case T_SPACE:
- case T_SPACE2:
- case T_ANY:
- case T_PP_NUMBER:
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- break;
- case T_EOF:
- // T_EOF is returned as a valid token, the next call will return T_EOI,
- // i.e. the actual end of input
- at_eof = true;
- value.clear();
- break;
- case T_OR_TRIGRAPH:
- case T_XOR_TRIGRAPH:
- case T_LEFTBRACE_TRIGRAPH:
- case T_RIGHTBRACE_TRIGRAPH:
- case T_LEFTBRACKET_TRIGRAPH:
- case T_RIGHTBRACKET_TRIGRAPH:
- case T_COMPL_TRIGRAPH:
- case T_POUND_TRIGRAPH:
- if (boost::wave::need_convert_trigraphs(language)) {
- value = cache.get_token_value(BASEID_FROM_TOKEN(id));
- }
- else {
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- }
- break;
- case T_ANY_TRIGRAPH:
- if (boost::wave::need_convert_trigraphs(language)) {
- value = impl::convert_trigraph(
- string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok));
- }
- else {
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- }
- break;
- default:
- if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) ||
- IS_CATEGORY(id, UnknownTokenType))
- {
- value = string_type((char const *)scanner.tok,
- scanner.cur-scanner.tok);
- }
- else {
- value = cache.get_token_value(id);
- }
- break;
- }
- // std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl;
- // the re2c lexer reports the new line number for newline tokens
- result = token_type(id, value, PositionT(filename, actline, scanner.column));
- #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
- return guards.detect_guard(result);
- #else
- return result;
- #endif
- }
- template <typename IteratorT, typename PositionT, typename TokenT>
- inline int
- lexer<IteratorT, PositionT, TokenT>::report_error(Scanner<IteratorT> const *s, int errcode,
- char const *msg, ...)
- {
- BOOST_ASSERT(0 != s);
- BOOST_ASSERT(0 != msg);
- using namespace std; // some systems have vsnprintf in namespace std
- constexpr std::size_t bufsize = 200; // should be large enough
- char buffer[bufsize];
- va_list params;
- va_start(params, msg);
- vsnprintf(buffer, bufsize, msg, params);
- va_end(params);
- BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line,
- s->column, s->file_name);
- // BOOST_UNREACHABLE_RETURN(0);
- return 0;
- }
- ///////////////////////////////////////////////////////////////////////////////
- //
- // lex_functor
- //
- ///////////////////////////////////////////////////////////////////////////////
- template <typename IteratorT,
- typename PositionT = boost::wave::util::file_position_type,
- typename TokenT = typename lexer<IteratorT, PositionT>::token_type>
- class lex_functor
- : public lex_input_interface_generator<TokenT>
- {
- public:
- typedef TokenT token_type;
- lex_functor(IteratorT const &first, IteratorT const &last,
- PositionT const &pos, boost::wave::language_support language)
- : re2c_lexer(first, last, pos, language)
- {}
- virtual ~lex_functor() {}
- // get the next token from the input stream
- token_type& get(token_type& result) BOOST_OVERRIDE { return re2c_lexer.get(result); }
- void set_position(PositionT const &pos) BOOST_OVERRIDE { re2c_lexer.set_position(pos); }
- #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
- bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
- { return re2c_lexer.has_include_guards(guard_name); }
- #endif
- private:
- lexer<IteratorT, PositionT, TokenT> re2c_lexer;
- };
- #if BOOST_WAVE_SUPPORT_THREADING == 0
- ///////////////////////////////////////////////////////////////////////////////
- template <typename IteratorT, typename PositionT, typename TokenT>
- token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const
- lexer<IteratorT, PositionT, TokenT>::cache =
- token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>();
- #endif
- } // namespace re2clex
- ///////////////////////////////////////////////////////////////////////////////
- //
- // The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp)
- // should be defined inline, if the lex_functor shouldn't be instantiated
- // separately from the lex_iterator.
- //
- // Separate (explicit) instantiation helps to reduce compilation time.
- //
- ///////////////////////////////////////////////////////////////////////////////
- #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
- #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
- #else
- #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
- #endif
- ///////////////////////////////////////////////////////////////////////////////
- //
- // The 'new_lexer' function allows the opaque generation of a new lexer object.
- // It is coupled to the iterator type to allow to decouple the lexer/iterator
- // configurations at compile time.
- //
- // This function is declared inside the cpp_lex_token.hpp file, which is
- // referenced by the source file calling the lexer and the source file, which
- // instantiates the lex_functor. But it is defined here, so it will be
- // instantiated only while compiling the source file, which instantiates the
- // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
- // this file (cpp_re2c_lexer.hpp) should be included only once. This allows
- // to decouple the lexer interface from the lexer implementation and reduces
- // compilation time.
- //
- ///////////////////////////////////////////////////////////////////////////////
- template <typename IteratorT, typename PositionT, typename TokenT>
- BOOST_WAVE_RE2C_NEW_LEXER_INLINE
- lex_input_interface<TokenT> *
- new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first,
- IteratorT const &last, PositionT const &pos,
- boost::wave::language_support language)
- {
- using re2clex::lex_functor;
- return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language);
- }
- #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
- ///////////////////////////////////////////////////////////////////////////////
- } // namespace cpplexer
- } // namespace wave
- } // namespace boost
- // the suffix header occurs after all of the code
- #ifdef BOOST_HAS_ABI_HEADERS
- #include BOOST_ABI_SUFFIX
- #endif
- #endif // !defined(BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
|