123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529 |
- // input.hpp
- // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
- #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
- #include "char_traits.hpp"
- #include "size_t.hpp"
- #include "state_machine.hpp"
- #include <iterator> // for std::iterator_traits
- namespace boost
- {
- namespace lexer
- {
- template<typename FwdIter, typename Traits =
- char_traits<typename std::iterator_traits<FwdIter>::value_type> >
- class basic_input
- {
- public:
- class iterator
- {
- public:
- friend class basic_input;
- struct data
- {
- std::size_t id;
- std::size_t unique_id;
- FwdIter start;
- FwdIter end;
- bool bol;
- std::size_t state;
- // Construct in end() state.
- data () :
- id (0),
- unique_id (npos),
- bol (false),
- state (npos)
- {
- }
- bool operator == (const data &rhs_) const
- {
- return id == rhs_.id && unique_id == rhs_.unique_id &&
- start == rhs_.start && end == rhs_.end &&
- bol == rhs_.bol && state == rhs_.state;
- }
- };
- iterator () :
- _input (0)
- {
- }
- bool operator == (const iterator &rhs_) const
- {
- return _data == rhs_._data;
- }
- bool operator != (const iterator &rhs_) const
- {
- return !(*this == rhs_);
- }
- data &operator * ()
- {
- return _data;
- }
- data *operator -> ()
- {
- return &_data;
- }
- // Let compiler generate operator = ().
- // prefix version
- iterator &operator ++ ()
- {
- next_token ();
- return *this;
- }
- // postfix version
- iterator operator ++ (int)
- {
- iterator iter_ = *this;
- next_token ();
- return iter_;
- }
- private:
- // Not owner (obviously!)
- const basic_input *_input;
- data _data;
- void next_token ()
- {
- const detail::internals &internals_ =
- _input->_state_machine->data ();
- _data.start = _data.end;
- if (internals_._dfa->size () == 1)
- {
- if (internals_._seen_BOL_assertion ||
- internals_._seen_EOL_assertion)
- {
- _data.id = next
- (&internals_._lookup->front ()->front (),
- internals_._dfa_alphabet.front (),
- &internals_._dfa->front ()->front (),
- _data.bol, _data.end, _input->_end, _data.unique_id);
- }
- else
- {
- _data.id = next (&internals_._lookup->front ()->front (),
- internals_._dfa_alphabet.front (), &internals_.
- _dfa->front ()->front (), _data.end, _input->_end,
- _data.unique_id);
- }
- }
- else
- {
- if (internals_._seen_BOL_assertion ||
- internals_._seen_EOL_assertion)
- {
- _data.id = next (internals_, _data.state,
- _data.bol, _data.end, _input->_end, _data.unique_id);
- }
- else
- {
- _data.id = next (internals_, _data.state,
- _data.end, _input->_end, _data.unique_id);
- }
- }
- if (_data.end == _input->_end && _data.start == _data.end)
- {
- // Ensure current state matches that returned by end().
- _data.state = npos;
- }
- }
- std::size_t next (const detail::internals &internals_,
- std::size_t &start_state_, bool bol_,
- FwdIter &start_token_, const FwdIter &end_,
- std::size_t &unique_id_)
- {
- if (start_token_ == end_)
- {
- unique_id_ = npos;
- return 0;
- }
- again:
- const std::size_t * lookup_ = &internals_._lookup[start_state_]->
- front ();
- std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
- const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- FwdIter curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- std::size_t uid_ = *(ptr_ + unique_id_index);
- std::size_t end_start_state_ = start_state_;
- bool end_bol_ = bol_;
- FwdIter end_token_ = start_token_;
- while (curr_ != end_)
- {
- const std::size_t BOL_state_ = ptr_[bol_index];
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (BOL_state_ && bol_)
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- typename Traits::char_type prev_char_ = *curr_++;
- bol_ = prev_char_ == '\n';
- const std::size_t state_ =
- ptr_[lookup_[static_cast<typename Traits::index_type>
- (prev_char_)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_start_state_ = *(ptr_ + state_index);
- end_bol_ = bol_;
- end_token_ = curr_;
- }
- }
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_start_state_ = *(ptr_ + state_index);
- end_bol_ = bol_;
- end_token_ = curr_;
- }
- }
- if (end_state_)
- {
- // return longest match
- start_state_ = end_start_state_;
- start_token_ = end_token_;
- if (id_ == 0)
- {
- bol_ = end_bol_;
- goto again;
- }
- else
- {
- _data.bol = end_bol_;
- }
- }
- else
- {
- // No match causes char to be skipped
- _data.bol = *start_token_ == '\n';
- ++start_token_;
- id_ = npos;
- uid_ = npos;
- }
- unique_id_ = uid_;
- return id_;
- }
- std::size_t next (const detail::internals &internals_,
- std::size_t &start_state_, FwdIter &start_token_,
- FwdIter const &end_, std::size_t &unique_id_)
- {
- if (start_token_ == end_)
- {
- unique_id_ = npos;
- return 0;
- }
- again:
- const std::size_t * lookup_ = &internals_._lookup[start_state_]->
- front ();
- std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
- const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- FwdIter curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- std::size_t uid_ = *(ptr_ + unique_id_index);
- std::size_t end_start_state_ = start_state_;
- FwdIter end_token_ = start_token_;
- while (curr_ != end_)
- {
- const std::size_t state_ = ptr_[lookup_[static_cast
- <typename Traits::index_type>(*curr_++)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_start_state_ = *(ptr_ + state_index);
- end_token_ = curr_;
- }
- }
- if (end_state_)
- {
- // return longest match
- start_state_ = end_start_state_;
- start_token_ = end_token_;
- if (id_ == 0) goto again;
- }
- else
- {
- // No match causes char to be skipped
- ++start_token_;
- id_ = npos;
- uid_ = npos;
- }
- unique_id_ = uid_;
- return id_;
- }
- std::size_t next (const std::size_t * const lookup_,
- const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- bool bol_, FwdIter &start_token_, FwdIter const &end_,
- std::size_t &unique_id_)
- {
- if (start_token_ == end_)
- {
- unique_id_ = npos;
- return 0;
- }
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- FwdIter curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- std::size_t uid_ = *(ptr_ + unique_id_index);
- bool end_bol_ = bol_;
- FwdIter end_token_ = start_token_;
- while (curr_ != end_)
- {
- const std::size_t BOL_state_ = ptr_[bol_index];
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (BOL_state_ && bol_)
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- typename Traits::char_type prev_char_ = *curr_++;
- bol_ = prev_char_ == '\n';
- const std::size_t state_ =
- ptr_[lookup_[static_cast<typename Traits::index_type>
- (prev_char_)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_bol_ = bol_;
- end_token_ = curr_;
- }
- }
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_bol_ = bol_;
- end_token_ = curr_;
- }
- }
- if (end_state_)
- {
- // return longest match
- _data.bol = end_bol_;
- start_token_ = end_token_;
- }
- else
- {
- // No match causes char to be skipped
- _data.bol = *start_token_ == '\n';
- ++start_token_;
- id_ = npos;
- uid_ = npos;
- }
- unique_id_ = uid_;
- return id_;
- }
- std::size_t next (const std::size_t * const lookup_,
- const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- FwdIter &start_token_, FwdIter const &end_,
- std::size_t &unique_id_)
- {
- if (start_token_ == end_)
- {
- unique_id_ = npos;
- return 0;
- }
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- FwdIter curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- std::size_t uid_ = *(ptr_ + unique_id_index);
- FwdIter end_token_ = start_token_;
- while (curr_ != end_)
- {
- const std::size_t state_ = ptr_[lookup_[static_cast
- <typename Traits::index_type>(*curr_++)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_token_ = curr_;
- }
- }
- if (end_state_)
- {
- // return longest match
- start_token_ = end_token_;
- }
- else
- {
- // No match causes char to be skipped
- ++start_token_;
- id_ = npos;
- uid_ = npos;
- }
- unique_id_ = uid_;
- return id_;
- }
- };
- friend class iterator;
- // Make it explicit that we are NOT taking a copy of state_machine_!
- basic_input (const basic_state_machine<typename Traits::char_type>
- *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
- _state_machine (state_machine_),
- _begin (begin_),
- _end (end_)
- {
- }
- iterator begin () const
- {
- iterator iter_;
- iter_._input = this;
- // Over-ride default of 0 (EOI)
- iter_._data.id = npos;
- iter_._data.start = _begin;
- iter_._data.end = _begin;
- iter_._data.bol = _state_machine->data ()._seen_BOL_assertion;
- iter_._data.state = 0;
- ++iter_;
- return iter_;
- }
- iterator end () const
- {
- iterator iter_;
- iter_._input = this;
- iter_._data.start = _end;
- iter_._data.end = _end;
- return iter_;
- }
- private:
- const basic_state_machine<typename Traits::char_type> *_state_machine;
- FwdIter _begin;
- FwdIter _end;
- };
- typedef basic_input<std::string::iterator> iter_input;
- typedef basic_input<std::basic_string<wchar_t>::iterator> iter_winput;
- typedef basic_input<const char *> ptr_input;
- typedef basic_input<const wchar_t *> ptr_winput;
- }
- }
- #endif
|