123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651 |
- // Copyright (c) 2001-2011 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #if !defined(BOOST_SPIRIT_LEX_TOKEN_FEB_10_2008_0751PM)
- #define BOOST_SPIRIT_LEX_TOKEN_FEB_10_2008_0751PM
- #if defined(_MSC_VER)
- #pragma once
- #endif
- #include <boost/config.hpp>
- #include <boost/detail/workaround.hpp>
- #include <boost/spirit/home/qi/detail/assign_to.hpp>
- #include <boost/spirit/home/support/attributes.hpp>
- #include <boost/spirit/home/support/argument.hpp>
- #include <boost/spirit/home/support/detail/lexer/generator.hpp>
- #include <boost/spirit/home/support/detail/lexer/rules.hpp>
- #include <boost/spirit/home/support/detail/lexer/consts.hpp>
- #include <boost/spirit/home/support/utree/utree_traits_fwd.hpp>
- #include <boost/spirit/home/lex/lexer/terminals.hpp>
- #include <boost/fusion/include/vector.hpp>
- #include <boost/fusion/include/at.hpp>
- #include <boost/fusion/include/value_at.hpp>
- #include <boost/variant.hpp>
- #include <boost/mpl/bool.hpp>
- #include <boost/mpl/vector.hpp>
- #include <boost/mpl/is_sequence.hpp>
- #include <boost/mpl/begin.hpp>
- #include <boost/mpl/insert.hpp>
- #include <boost/mpl/vector.hpp>
- #include <boost/mpl/if.hpp>
- #include <boost/mpl/or.hpp>
- #include <boost/range/iterator_range_core.hpp>
- #include <boost/type_traits/is_same.hpp>
- #include <boost/type_traits/make_unsigned.hpp>
- #include <boost/static_assert.hpp>
- #if defined(BOOST_SPIRIT_DEBUG)
- #include <iosfwd>
- #endif
- namespace boost { namespace spirit { namespace lex { namespace lexertl
- {
- ///////////////////////////////////////////////////////////////////////////
- //
- // The token is the type of the objects returned by default by the
- // iterator.
- //
- // template parameters:
- // Iterator The type of the iterator used to access the
- // underlying character stream.
- // AttributeTypes A mpl sequence containing the types of all
- // required different token values to be supported
- // by this token type.
- // HasState A mpl::bool_ indicating, whether this token type
- // should support lexer states.
- // Idtype The type to use for the token id (defaults to
- // std::size_t).
- //
- // It is possible to use other token types with the spirit::lex
- // framework as well. If you plan to use a different type as your token
- // type, you'll need to expose the following things from your token type
- // to make it compatible with spirit::lex:
- //
- // typedefs
- // iterator_type The type of the iterator used to access the
- // underlying character stream.
- //
- // id_type The type of the token id used.
- //
- // methods
- // default constructor
- // This should initialize the token as an end of
- // input token.
- // constructors The prototype of the other required
- // constructors should be:
- //
- // token(int)
- // This constructor should initialize the token as
- // an invalid token (not carrying any specific
- // values)
- //
- // where: the int is used as a tag only and its value is
- // ignored
- //
- // and:
- //
- // token(Idtype id, std::size_t state,
- // iterator_type first, iterator_type last);
- //
- // where: id: token id
- // state: lexer state this token was matched in
- // first, last: pair of iterators marking the matched
- // range in the underlying input stream
- //
- // accessors
- // id() return the token id of the matched input sequence
- // id(newid) set the token id of the token instance
- //
- // state() return the lexer state this token was matched in
- //
- // value() return the token value
- //
- // Additionally, you will have to implement a couple of helper functions
- // in the same namespace as the token type: a comparison operator==() to
- // compare your token instances, a token_is_valid() function and different
- // specializations of the Spirit customization point
- // assign_to_attribute_from_value as shown below.
- //
- ///////////////////////////////////////////////////////////////////////////
- template <typename Iterator = char const*
- , typename AttributeTypes = mpl::vector0<>
- , typename HasState = mpl::true_
- , typename Idtype = std::size_t>
- struct token;
- ///////////////////////////////////////////////////////////////////////////
- // This specialization of the token type doesn't contain any item data and
- // doesn't support working with lexer states.
- ///////////////////////////////////////////////////////////////////////////
- template <typename Iterator, typename Idtype>
- struct token<Iterator, lex::omit, mpl::false_, Idtype>
- {
- typedef Iterator iterator_type;
- typedef mpl::false_ has_state;
- typedef Idtype id_type;
- typedef unused_type token_value_type;
- typedef typename make_unsigned<id_type>::type uid_type;
- // default constructed tokens correspond to EOI tokens
- token() : id_(boost::lexer::npos) {}
- // construct an invalid token
- explicit token(int) : id_(0) {}
- token(id_type id, std::size_t) : id_(id) {}
- token(id_type id, std::size_t, token_value_type)
- : id_(id) {}
- token_value_type& value() { static token_value_type u; return u; }
- token_value_type const& value() const { return unused; }
- #if defined(BOOST_SPIRIT_DEBUG)
- token(id_type id, std::size_t, Iterator const& first
- , Iterator const& last)
- : matched_(first, last)
- , id_(id) {}
- #else
- token(id_type id, std::size_t, Iterator const&, Iterator const&)
- : id_(id) {}
- #endif
- // this default conversion operator is needed to allow the direct
- // usage of tokens in conjunction with the primitive parsers defined
- // in Qi
- operator id_type() const { return id_type(uid_type(id_)); }
- // Retrieve or set the token id of this token instance.
- id_type id() const { return id_type(uid_type(id_)); }
- void id(id_type newid) { id_ = uid_type(newid); }
- std::size_t state() const { return 0; } // always '0' (INITIAL state)
- bool is_valid() const
- {
- return 0 != id_ && boost::lexer::npos != id_;
- }
- #if defined(BOOST_SPIRIT_DEBUG)
- #if BOOST_WORKAROUND(BOOST_MSVC, == 1600)
- // workaround for MSVC10 which has problems copying a default
- // constructed iterator_range
- token& operator= (token const& rhs)
- {
- if (this != &rhs)
- {
- id_ = rhs.id_;
- if (is_valid())
- matched_ = rhs.matched_;
- }
- return *this;
- }
- #endif
- std::pair<Iterator, Iterator> matched_;
- #endif
- protected:
- std::size_t id_; // token id, 0 if nothing has been matched
- };
- #if defined(BOOST_SPIRIT_DEBUG)
- template <typename Char, typename Traits, typename Iterator
- , typename AttributeTypes, typename HasState, typename Idtype>
- inline std::basic_ostream<Char, Traits>&
- operator<< (std::basic_ostream<Char, Traits>& os
- , token<Iterator, AttributeTypes, HasState, Idtype> const& t)
- {
- if (t.is_valid()) {
- Iterator end = t.matched_.second;
- for (Iterator it = t.matched_.first; it != end; ++it)
- os << *it;
- }
- else {
- os << "<invalid token>";
- }
- return os;
- }
- #endif
- ///////////////////////////////////////////////////////////////////////////
- // This specialization of the token type doesn't contain any item data but
- // supports working with lexer states.
- ///////////////////////////////////////////////////////////////////////////
- template <typename Iterator, typename Idtype>
- struct token<Iterator, lex::omit, mpl::true_, Idtype>
- : token<Iterator, lex::omit, mpl::false_, Idtype>
- {
- private:
- typedef token<Iterator, lex::omit, mpl::false_, Idtype> base_type;
- public:
- typedef typename base_type::id_type id_type;
- typedef Iterator iterator_type;
- typedef mpl::true_ has_state;
- typedef unused_type token_value_type;
- // default constructed tokens correspond to EOI tokens
- token() : state_(boost::lexer::npos) {}
- // construct an invalid token
- explicit token(int) : base_type(0), state_(boost::lexer::npos) {}
- token(id_type id, std::size_t state)
- : base_type(id, boost::lexer::npos), state_(state) {}
- token(id_type id, std::size_t state, token_value_type)
- : base_type(id, boost::lexer::npos, unused)
- , state_(state) {}
- token(id_type id, std::size_t state
- , Iterator const& first, Iterator const& last)
- : base_type(id, boost::lexer::npos, first, last)
- , state_(state) {}
- std::size_t state() const { return state_; }
- #if defined(BOOST_SPIRIT_DEBUG) && BOOST_WORKAROUND(BOOST_MSVC, == 1600)
- // workaround for MSVC10 which has problems copying a default
- // constructed iterator_range
- token& operator= (token const& rhs)
- {
- if (this != &rhs)
- {
- this->base_type::operator=(static_cast<base_type const&>(rhs));
- state_ = rhs.state_;
- }
- return *this;
- }
- #endif
- protected:
- std::size_t state_; // lexer state this token was matched in
- };
- ///////////////////////////////////////////////////////////////////////////
- // The generic version of the token type derives from the
- // specialization above and adds a single data member holding the item
- // data carried by the token instance.
- ///////////////////////////////////////////////////////////////////////////
- namespace detail
- {
- ///////////////////////////////////////////////////////////////////////
- // Meta-function to calculate the type of the variant data item to be
- // stored with each token instance.
- //
- // Note: The iterator pair needs to be the first type in the list of
- // types supported by the generated variant type (this is being
- // used to identify whether the stored data item in a particular
- // token instance needs to be converted from the pair of
- // iterators (see the first of the assign_to_attribute_from_value
- // specializations below).
- ///////////////////////////////////////////////////////////////////////
- template <typename IteratorPair, typename AttributeTypes>
- struct token_value_typesequence
- {
- typedef typename mpl::insert<
- AttributeTypes
- , typename mpl::begin<AttributeTypes>::type
- , IteratorPair
- >::type sequence_type;
- typedef typename make_variant_over<sequence_type>::type type;
- };
- ///////////////////////////////////////////////////////////////////////
- // The type of the data item stored with a token instance is defined
- // by the template parameter 'AttributeTypes' and may be:
- //
- // lex::omit: no data item is stored with the token
- // instance (this is handled by the
- // specializations of the token class
- // below)
- // mpl::vector0<>: each token instance stores a pair of
- // iterators pointing to the matched input
- // sequence
- // mpl::vector<...>: each token instance stores a variant being
- // able to store the pair of iterators pointing
- // to the matched input sequence, or any of the
- // types a specified in the mpl::vector<>
- //
- // All this is done to ensure the token type is as small (in terms
- // of its byte-size) as possible.
- ///////////////////////////////////////////////////////////////////////
- template <typename IteratorPair, typename AttributeTypes>
- struct token_value_type
- : mpl::eval_if<
- mpl::or_<
- is_same<AttributeTypes, mpl::vector0<> >
- , is_same<AttributeTypes, mpl::vector<> > >
- , mpl::identity<IteratorPair>
- , token_value_typesequence<IteratorPair, AttributeTypes> >
- {};
- }
- template <typename Iterator, typename AttributeTypes, typename HasState
- , typename Idtype>
- struct token : token<Iterator, lex::omit, HasState, Idtype>
- {
- private: // precondition assertions
- BOOST_STATIC_ASSERT((mpl::is_sequence<AttributeTypes>::value ||
- is_same<AttributeTypes, lex::omit>::value));
- typedef token<Iterator, lex::omit, HasState, Idtype> base_type;
- protected:
- // If no additional token value types are given, the token will
- // hold the plain pair of iterators pointing to the matched range
- // in the underlying input sequence. Otherwise the token value is
- // stored as a variant and will again hold the pair of iterators but
- // is able to hold any of the given data types as well. The conversion
- // from the iterator pair to the required data type is done when it is
- // accessed for the first time.
- typedef iterator_range<Iterator> iterpair_type;
- public:
- typedef typename base_type::id_type id_type;
- typedef typename detail::token_value_type<
- iterpair_type, AttributeTypes
- >::type token_value_type;
- typedef Iterator iterator_type;
- // default constructed tokens correspond to EOI tokens
- token() : value_(iterpair_type(iterator_type(), iterator_type())) {}
- // construct an invalid token
- explicit token(int)
- : base_type(0)
- , value_(iterpair_type(iterator_type(), iterator_type())) {}
- token(id_type id, std::size_t state, token_value_type const& value)
- : base_type(id, state, value)
- , value_(value) {}
- token(id_type id, std::size_t state, Iterator const& first
- , Iterator const& last)
- : base_type(id, state, first, last)
- , value_(iterpair_type(first, last)) {}
- token_value_type& value() { return value_; }
- token_value_type const& value() const { return value_; }
- #if BOOST_WORKAROUND(BOOST_MSVC, == 1600)
- // workaround for MSVC10 which has problems copying a default
- // constructed iterator_range
- token& operator= (token const& rhs)
- {
- if (this != &rhs)
- {
- this->base_type::operator=(static_cast<base_type const&>(rhs));
- if (this->is_valid())
- value_ = rhs.value_;
- }
- return *this;
- }
- #endif
- protected:
- token_value_type value_; // token value, by default a pair of iterators
- };
- ///////////////////////////////////////////////////////////////////////////
- // tokens are considered equal, if their id's match (these are unique)
- template <typename Iterator, typename AttributeTypes, typename HasState
- , typename Idtype>
- inline bool
- operator== (token<Iterator, AttributeTypes, HasState, Idtype> const& lhs,
- token<Iterator, AttributeTypes, HasState, Idtype> const& rhs)
- {
- return lhs.id() == rhs.id();
- }
- ///////////////////////////////////////////////////////////////////////////
- // This overload is needed by the multi_pass/functor_input_policy to
- // validate a token instance. It has to be defined in the same namespace
- // as the token class itself to allow ADL to find it.
- ///////////////////////////////////////////////////////////////////////////
- template <typename Iterator, typename AttributeTypes, typename HasState
- , typename Idtype>
- inline bool
- token_is_valid(token<Iterator, AttributeTypes, HasState, Idtype> const& t)
- {
- return t.is_valid();
- }
- }}}}
- namespace boost { namespace spirit { namespace traits
- {
- ///////////////////////////////////////////////////////////////////////////
- // We have to provide specializations for the customization point
- // assign_to_attribute_from_value allowing to extract the needed value
- // from the token.
- ///////////////////////////////////////////////////////////////////////////
- // This is called from the parse function of token_def if the token_def
- // has been defined to carry a special attribute type
- template <typename Attribute, typename Iterator, typename AttributeTypes
- , typename HasState, typename Idtype>
- struct assign_to_attribute_from_value<Attribute
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- {
- static void
- call(lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> const& t
- , Attribute& attr)
- {
- // The goal of this function is to avoid the conversion of the pair of
- // iterators (to the matched character sequence) into the token value
- // of the required type being done more than once. For this purpose it
- // checks whether the stored value type is still the default one (pair
- // of iterators) and if yes, replaces the pair of iterators with the
- // converted value to be returned from subsequent calls.
- if (0 == t.value().which()) {
- // first access to the token value
- typedef iterator_range<Iterator> iterpair_type;
- iterpair_type const& ip = boost::get<iterpair_type>(t.value());
- // Interestingly enough we use the assign_to() framework defined in
- // Spirit.Qi allowing to convert the pair of iterators to almost any
- // required type (assign_to(), if available, uses the standard Spirit
- // parsers to do the conversion).
- spirit::traits::assign_to(ip.begin(), ip.end(), attr);
- // If you get an error during the compilation of the following
- // assignment expression, you probably forgot to list one or more
- // types used as token value types (in your token_def<...>
- // definitions) in your definition of the token class. I.e. any token
- // value type used for a token_def<...> definition has to be listed
- // during the declaration of the token type to use. For instance let's
- // assume we have two token_def's:
- //
- // token_def<int> number; number = "...";
- // token_def<std::string> identifier; identifier = "...";
- //
- // Then you'll have to use the following token type definition
- // (assuming you are using the token class):
- //
- // typedef mpl::vector<int, std::string> token_values;
- // typedef token<base_iter_type, token_values> token_type;
- //
- // where: base_iter_type is the iterator type used to expose the
- // underlying input stream.
- //
- // This token_type has to be used as the second template parameter
- // to the lexer class:
- //
- // typedef lexer<base_iter_type, token_type> lexer_type;
- //
- // again, assuming you're using the lexer<> template for your
- // tokenization.
- typedef lex::lexertl::token<
- Iterator, AttributeTypes, HasState, Idtype> token_type;
- spirit::traits::assign_to(
- attr, const_cast<token_type&>(t).value()); // re-assign value
- }
- else {
- // reuse the already assigned value
- spirit::traits::assign_to(boost::get<Attribute>(t.value()), attr);
- }
- }
- };
- template <typename Attribute, typename Iterator, typename AttributeTypes
- , typename HasState, typename Idtype>
- struct assign_to_container_from_value<Attribute
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- : assign_to_attribute_from_value<Attribute
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- {};
- template <typename Iterator, typename AttributeTypes
- , typename HasState, typename Idtype>
- struct assign_to_container_from_value<utree
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- : assign_to_attribute_from_value<utree
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- {};
- template <typename Iterator>
- struct assign_to_container_from_value<
- iterator_range<Iterator>, iterator_range<Iterator> >
- {
- static void
- call(iterator_range<Iterator> const& val, iterator_range<Iterator>& attr)
- {
- attr = val;
- }
- };
- // These are called from the parse function of token_def if the token type
- // has no special attribute type assigned
- template <typename Attribute, typename Iterator, typename HasState
- , typename Idtype>
- struct assign_to_attribute_from_value<Attribute
- , lex::lexertl::token<Iterator, mpl::vector0<>, HasState, Idtype> >
- {
- static void
- call(lex::lexertl::token<Iterator, mpl::vector0<>, HasState, Idtype> const& t
- , Attribute& attr)
- {
- // The default type returned by the token_def parser component (if
- // it has no token value type assigned) is the pair of iterators
- // to the matched character sequence.
- spirit::traits::assign_to(t.value().begin(), t.value().end(), attr);
- }
- };
- // template <typename Attribute, typename Iterator, typename HasState
- // , typename Idtype>
- // struct assign_to_container_from_value<Attribute
- // , lex::lexertl::token<Iterator, mpl::vector0<>, HasState, Idtype> >
- // : assign_to_attribute_from_value<Attribute
- // , lex::lexertl::token<Iterator, mpl::vector0<>, HasState, Idtype> >
- // {};
- // same as above but using mpl::vector<> instead of mpl::vector0<>
- template <typename Attribute, typename Iterator, typename HasState
- , typename Idtype>
- struct assign_to_attribute_from_value<Attribute
- , lex::lexertl::token<Iterator, mpl::vector<>, HasState, Idtype> >
- {
- static void
- call(lex::lexertl::token<Iterator, mpl::vector<>, HasState, Idtype> const& t
- , Attribute& attr)
- {
- // The default type returned by the token_def parser component (if
- // it has no token value type assigned) is the pair of iterators
- // to the matched character sequence.
- spirit::traits::assign_to(t.value().begin(), t.value().end(), attr);
- }
- };
- // template <typename Attribute, typename Iterator, typename HasState
- // , typename Idtype>
- // struct assign_to_container_from_value<Attribute
- // , lex::lexertl::token<Iterator, mpl::vector<>, HasState, Idtype> >
- // : assign_to_attribute_from_value<Attribute
- // , lex::lexertl::token<Iterator, mpl::vector<>, HasState, Idtype> >
- // {};
- // This is called from the parse function of token_def if the token type
- // has been explicitly omitted (i.e. no attribute value is used), which
- // essentially means that every attribute gets initialized using default
- // constructed values.
- template <typename Attribute, typename Iterator, typename HasState
- , typename Idtype>
- struct assign_to_attribute_from_value<Attribute
- , lex::lexertl::token<Iterator, lex::omit, HasState, Idtype> >
- {
- static void
- call(lex::lexertl::token<Iterator, lex::omit, HasState, Idtype> const&
- , Attribute&)
- {
- // do nothing
- }
- };
- template <typename Attribute, typename Iterator, typename HasState
- , typename Idtype>
- struct assign_to_container_from_value<Attribute
- , lex::lexertl::token<Iterator, lex::omit, HasState, Idtype> >
- : assign_to_attribute_from_value<Attribute
- , lex::lexertl::token<Iterator, lex::omit, HasState, Idtype> >
- {};
- // This is called from the parse function of lexer_def_
- template <typename Iterator, typename AttributeTypes, typename HasState
- , typename Idtype_, typename Idtype>
- struct assign_to_attribute_from_value<
- fusion::vector2<Idtype_, iterator_range<Iterator> >
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- {
- static void
- call(lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> const& t
- , fusion::vector2<Idtype_, iterator_range<Iterator> >& attr)
- {
- // The type returned by the lexer_def_ parser components is a
- // fusion::vector containing the token id of the matched token
- // and the pair of iterators to the matched character sequence.
- typedef iterator_range<Iterator> iterpair_type;
- typedef fusion::vector2<Idtype_, iterator_range<Iterator> >
- attribute_type;
- iterpair_type const& ip = boost::get<iterpair_type>(t.value());
- attr = attribute_type(t.id(), ip);
- }
- };
- template <typename Iterator, typename AttributeTypes, typename HasState
- , typename Idtype_, typename Idtype>
- struct assign_to_container_from_value<
- fusion::vector2<Idtype_, iterator_range<Iterator> >
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- : assign_to_attribute_from_value<
- fusion::vector2<Idtype_, iterator_range<Iterator> >
- , lex::lexertl::token<Iterator, AttributeTypes, HasState, Idtype> >
- {};
- ///////////////////////////////////////////////////////////////////////////
- // Overload debug output for a single token, this integrates lexer tokens
- // with Qi's simple_trace debug facilities
- template <typename Iterator, typename Attribute, typename HasState
- , typename Idtype>
- struct token_printer_debug<
- lex::lexertl::token<Iterator, Attribute, HasState, Idtype> >
- {
- typedef lex::lexertl::token<Iterator, Attribute, HasState, Idtype> token_type;
- template <typename Out>
- static void print(Out& out, token_type const& val)
- {
- out << '[';
- spirit::traits::print_token(out, val.value());
- out << ']';
- }
- };
- }}}
- #endif
|