123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431 |
- // state_machine.hpp
- // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
- #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
- #include <algorithm>
- #include "conversion/char_state_machine.hpp"
- #include "consts.hpp"
- #include <deque>
- #include "internals.hpp"
- #include <map>
- #include "containers/ptr_vector.hpp"
- #include "size_t.hpp"
- #include <string>
- namespace boost
- {
- namespace lexer
- {
- template<typename CharT>
- class basic_state_machine
- {
- public:
- typedef CharT char_type;
- class iterator
- {
- public:
- friend class basic_state_machine;
- struct data
- {
- // Current iterator info
- std::size_t dfa;
- std::size_t states;
- std::size_t state;
- std::size_t transitions;
- std::size_t transition;
- // Current state info
- bool end_state;
- std::size_t id;
- std::size_t unique_id;
- std::size_t goto_dfa;
- std::size_t bol_index;
- std::size_t eol_index;
- // Current transition info
- basic_string_token<CharT> token;
- std::size_t goto_state;
- data () :
- dfa (npos),
- states (0),
- state (npos),
- transitions (0),
- transition (npos),
- end_state (false),
- id (npos),
- unique_id (npos),
- goto_dfa (npos),
- bol_index (npos),
- eol_index (npos),
- goto_state (npos)
- {
- }
- bool operator == (const data &rhs_) const
- {
- return dfa == rhs_.dfa &&
- states == rhs_.states &&
- state == rhs_.state &&
- transitions == rhs_.transitions &&
- transition == rhs_.transition &&
- end_state == rhs_.end_state &&
- id == rhs_.id &&
- unique_id == rhs_.unique_id &&
- goto_dfa == rhs_.goto_dfa &&
- bol_index == rhs_.bol_index &&
- eol_index == rhs_.eol_index &&
- token == rhs_.token &&
- goto_state == rhs_.goto_state;
- }
- };
- iterator () :
- _sm (0),
- _dfas (0),
- _dfa (npos),
- _states (0),
- _state (npos),
- _transitions (0),
- _transition (npos)
- {
- }
- bool operator == (const iterator &rhs_) const
- {
- return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
- _states == rhs_._states && _state == rhs_._state &&
- _transitions == rhs_._transitions &&
- _transition == rhs_._transition;
- }
- bool operator != (const iterator &rhs_) const
- {
- return !(*this == rhs_);
- }
- data &operator * ()
- {
- return _data;
- }
- data *operator -> ()
- {
- return &_data;
- }
- // Let compiler generate operator = ().
- // prefix version
- iterator &operator ++ ()
- {
- next ();
- return *this;
- }
- // postfix version
- iterator operator ++ (int)
- {
- iterator iter_ = *this;
- next ();
- return iter_;
- }
- void clear ()
- {
- _dfas = _states = _transitions = 0;
- _dfa = _state = _transition = npos;
- }
- private:
- basic_state_machine *_sm;
- data _data;
- std::size_t _dfas;
- std::size_t _dfa;
- std::size_t _states;
- std::size_t _state;
- std::size_t _transitions;
- std::size_t _transition;
- typename detail::basic_char_state_machine<CharT>::state::
- size_t_string_token_map::const_iterator _token_iter;
- typename detail::basic_char_state_machine<CharT>::state::
- size_t_string_token_map::const_iterator _token_end;
- void next ()
- {
- bool reset_state_ = false;
- if (_transition >= _transitions)
- {
- _transition = _data.transition = 0;
- _data.state = ++_state;
- reset_state_ = true;
- if (_state >= _states)
- {
- ++_dfa;
- if (_dfa >= _dfas)
- {
- clear ();
- reset_state_ = false;
- }
- else
- {
- _states = _data.states =
- _sm->_csm._sm_vector[_dfa].size ();
- _state = _data.state = 0;
- }
- }
- }
- else
- {
- _data.transition = _transition;
- }
- if (reset_state_)
- {
- const typename detail::basic_char_state_machine<CharT>::
- state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
- _transitions = _data.transitions = ptr_->_transitions.size ();
- _data.end_state = ptr_->_end_state;
- _data.id = ptr_->_id;
- _data.unique_id = ptr_->_unique_id;
- _data.goto_dfa = ptr_->_state;
- _data.bol_index = ptr_->_bol_index;
- _data.eol_index = ptr_->_eol_index;
- _token_iter = ptr_->_transitions.begin ();
- _token_end = ptr_->_transitions.end ();
- }
- if (_token_iter != _token_end)
- {
- _data.token = _token_iter->second;
- _data.goto_state = _token_iter->first;
- ++_token_iter;
- ++_transition;
- }
- else
- {
- _data.token.clear ();
- _data.goto_state = npos;
- }
- }
- };
- friend class iterator;
- basic_state_machine ()
- {
- }
- void clear ()
- {
- _internals.clear ();
- _csm.clear ();
- }
- bool empty () const
- {
- // Don't include _csm in this test, as irrelevant to state.
- return _internals._lookup->empty () &&
- _internals._dfa_alphabet.empty () &&
- _internals._dfa->empty ();
- }
- std::size_t size () const
- {
- return _internals._dfa->size ();
- }
- bool operator == (const basic_state_machine &rhs_) const
- {
- // Don't include _csm in this test, as irrelevant to state.
- return _internals._lookup == rhs_._internals._lookup &&
- _internals._dfa_alphabet == rhs_._internals._dfa_alphabet &&
- _internals._dfa == rhs_._internals._dfa &&
- _internals._seen_BOL_assertion ==
- rhs_._internals._seen_BOL_assertion &&
- _internals._seen_EOL_assertion ==
- rhs_._internals._seen_EOL_assertion;
- }
- iterator begin () const
- {
- iterator iter_;
- iter_._sm = const_cast<basic_state_machine *>(this);
- check_for_csm ();
- if (!_csm.empty ())
- {
- const typename detail::basic_char_state_machine<CharT>::
- state_vector *ptr_ = &_csm._sm_vector.front ();
- iter_._dfas = _csm._sm_vector.size ();
- iter_._states = iter_._data.states = ptr_->size ();
- iter_._transitions = iter_._data.transitions =
- ptr_->front ()._transitions.size ();
- iter_._dfa = iter_._data.dfa = 0;
- iter_._state = iter_._data.state = 0;
- iter_._transition = 0;
- iter_._data.end_state = ptr_->front ()._end_state;
- iter_._data.id = ptr_->front ()._id;
- iter_._data.unique_id = ptr_->front ()._unique_id;
- iter_._data.goto_dfa = ptr_->front ()._state;
- iter_._data.bol_index = ptr_->front ()._bol_index;
- iter_._data.eol_index = ptr_->front ()._eol_index;
- iter_._token_iter = ptr_->front ()._transitions.begin ();
- iter_._token_end = ptr_->front ()._transitions.end ();
- // Deal with case where there is only a bol or eol
- // but no other transitions.
- if (iter_._transitions)
- {
- ++iter_;
- }
- }
- return iter_;
- }
- iterator end () const
- {
- iterator iter_;
- iter_._sm = const_cast<basic_state_machine *>(this);
- return iter_;
- }
- void swap (basic_state_machine &sm_)
- {
- _internals.swap (sm_._internals);
- _csm.swap (sm_._csm);
- }
- const detail::internals &data () const
- {
- return _internals;
- }
- private:
- detail::internals _internals;
- mutable detail::basic_char_state_machine<CharT> _csm;
- void check_for_csm () const
- {
- if (_csm.empty ())
- {
- human_readable (_csm);
- }
- }
- void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
- {
- const std::size_t max_ = sizeof (CharT) == 1 ?
- num_chars : num_wchar_ts;
- const std::size_t start_states_ = _internals._dfa->size ();
- sm_.clear ();
- sm_._sm_vector.resize (start_states_);
- for (std::size_t start_state_index_ = 0;
- start_state_index_ < start_states_; ++start_state_index_)
- {
- const detail::internals::size_t_vector *lu_ =
- _internals._lookup[start_state_index_];
- const std::size_t alphabet_ =
- _internals._dfa_alphabet[start_state_index_] - dfa_offset;
- std::vector<std::basic_string<CharT> > chars_ (alphabet_);
- const std::size_t states_ = _internals._dfa[start_state_index_]->
- size () / (alphabet_ + dfa_offset);
- const std::size_t *read_ptr_ = &_internals.
- _dfa[start_state_index_]->front () + alphabet_ + dfa_offset;
- sm_._sm_vector[start_state_index_].resize (states_ - 1);
- for (std::size_t alpha_index_ = 0; alpha_index_ < max_;
- ++alpha_index_)
- {
- const std::size_t col_ = lu_->at (alpha_index_);
- if (col_ != static_cast<std::size_t>(dead_state_index))
- {
- chars_[col_ - dfa_offset] += static_cast<CharT>
- (alpha_index_);
- }
- }
- for (std::size_t state_index_ = 1; state_index_ < states_;
- ++state_index_)
- {
- typename detail::basic_char_state_machine<CharT>::state
- *state_ = &sm_._sm_vector[start_state_index_]
- [state_index_ - 1];
- state_->_end_state = *read_ptr_ != 0;
- state_->_id = *(read_ptr_ + id_index);
- state_->_unique_id = *(read_ptr_ + unique_id_index);
- state_->_state = *(read_ptr_ + state_index);
- state_->_bol_index = *(read_ptr_ + bol_index) - 1;
- state_->_eol_index = *(read_ptr_ + eol_index) - 1;
- read_ptr_ += dfa_offset;
- for (std::size_t col_index_ = 0; col_index_ < alphabet_;
- ++col_index_, ++read_ptr_)
- {
- const std::size_t transition_ = *read_ptr_;
- if (transition_ != 0)
- {
- const std::size_t i_ = transition_ - 1;
- typename detail::basic_char_state_machine<CharT>::
- state::size_t_string_token_map::iterator iter_ =
- state_->_transitions.find (i_);
- if (iter_ == state_->_transitions.end ())
- {
- basic_string_token<CharT> token_
- (false, chars_[col_index_]);
- typename detail::basic_char_state_machine<CharT>::
- state::size_t_string_token_pair pair_
- (i_, token_);
- state_->_transitions.insert (pair_);
- }
- else
- {
- iter_->second._charset += chars_[col_index_];
- }
- }
- }
- for (typename detail::basic_char_state_machine<CharT>::state::
- size_t_string_token_map::iterator iter_ =
- state_->_transitions.begin (),
- end_ = state_->_transitions.end ();
- iter_ != end_; ++iter_)
- {
- std::sort (iter_->second._charset.begin (),
- iter_->second._charset.end ());
- iter_->second.normalise ();
- }
- }
- }
- }
- };
- typedef basic_state_machine<char> state_machine;
- typedef basic_state_machine<wchar_t> wstate_machine;
- }
- }
- #endif
|