123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475 |
- // file_input.hpp
- // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_FILE_INPUT_HPP
- #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_FILE_INPUT_HPP
- #include "char_traits.hpp"
- // memcpy
- #include <cstring>
- #include "size_t.hpp"
- #include "state_machine.hpp"
- #include <iosfwd>
- namespace boost
- {
- namespace lexer
- {
- template<typename CharT, typename Traits = char_traits<CharT> >
- class basic_file_input
- {
- public:
- class iterator
- {
- public:
- friend class basic_file_input;
- struct data
- {
- std::size_t id;
- std::size_t unique_id;
- const CharT *start;
- const CharT *end;
- std::size_t state;
- // Construct in end() state.
- data () :
- id (0),
- unique_id (npos),
- state (npos)
- {
- }
- bool operator == (const data &rhs_) const
- {
- return id == rhs_.id && unique_id == rhs_.unique_id &&
- start == rhs_.start && end == rhs_.end &&
- state == rhs_.state;
- }
- };
- iterator () :
- _input (0)
- {
- }
- bool operator == (const iterator &rhs_) const
- {
- return _data == rhs_._data;
- }
- bool operator != (const iterator &rhs_) const
- {
- return !(*this == rhs_);
- }
- data &operator * ()
- {
- return _data;
- }
- data *operator -> ()
- {
- return &_data;
- }
- // Let compiler generate operator = ().
- // prefix version
- iterator &operator ++ ()
- {
- next_token ();
- return *this;
- }
- // postfix version
- iterator operator ++ (int)
- {
- iterator iter_ = *this;
- next_token ();
- return iter_;
- }
- void next_token ()
- {
- const detail::internals &internals_ =
- _input->_state_machine->data ();
- _data.start = _data.end;
- if (internals_._dfa->size () == 1)
- {
- _data.id = _input->next (&internals_._lookup->front ()->
- front (), internals_._dfa_alphabet.front (),
- &internals_._dfa->front ()->front (), _data.start,
- _data.end, _data.unique_id);
- }
- else
- {
- _data.id = _input->next (internals_, _data.state, _data.start,
- _data.end, _data.unique_id);
- }
- if (_data.id == 0)
- {
- _data.start = 0;
- _data.end = 0;
- // Ensure current state matches that returned by end().
- _data.state = npos;
- }
- }
- private:
- // Not owner (obviously!)
- basic_file_input *_input;
- data _data;
- };
- friend class iterator;
- // Make it explicit that we are NOT taking a copy of state_machine_!
- basic_file_input (const basic_state_machine<CharT> *state_machine_,
- std::basic_ifstream<CharT> *is_,
- const std::streamsize buffer_size_ = 4096,
- const std::streamsize buffer_increment_ = 1024) :
- _state_machine (state_machine_),
- _stream (is_),
- _buffer_size (buffer_size_),
- _buffer_increment (buffer_increment_),
- _buffer (_buffer_size, '!')
- {
- _start_buffer = &_buffer.front ();
- _end_buffer = _start_buffer + _buffer.size ();
- _start_token = _end_buffer;
- _end_token = _end_buffer;
- }
- iterator begin ()
- {
- iterator iter_;
- iter_._input = this;
- // Over-ride default of 0 (EOF)
- iter_._data.id = npos;
- iter_._data.start = 0;
- iter_._data.end = 0;
- iter_._data.state = 0;
- ++iter_;
- return iter_;
- }
- iterator end ()
- {
- iterator iter_;
- iter_._input = this;
- iter_._data.start = 0;
- iter_._data.end = 0;
- return iter_;
- }
- void flush ()
- {
- // This temporary is mandatory, otherwise the
- // pointer calculations won't work!
- const CharT *temp_ = _end_buffer;
- _start_token = _end_token = _end_buffer;
- reload_buffer (temp_, true, _end_token);
- }
- private:
- typedef std::basic_istream<CharT> istream;
- typedef std::vector<CharT> buffer;
- const basic_state_machine<CharT> *_state_machine;
- const std::streamsize _buffer_size;
- const std::streamsize _buffer_increment;
- buffer _buffer;
- CharT *_start_buffer;
- istream *_stream;
- const CharT *_start_token;
- const CharT *_end_token;
- CharT *_end_buffer;
- std::size_t next (const detail::internals &internals_,
- std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
- std::size_t &unique_id_)
- {
- _start_token = _end_token;
- again:
- const std::size_t * lookup_ = &internals_._lookup[start_state_]->
- front ();
- std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
- const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- const CharT *curr_ = _start_token;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- std::size_t uid_ = *(ptr_ + unique_id_index);
- const CharT *end_token_ = curr_;
- for (;;)
- {
- if (curr_ >= _end_buffer)
- {
- if (!reload_buffer (curr_, end_state_, end_token_))
- {
- // EOF
- break;
- }
- }
- const std::size_t BOL_state_ = ptr_[bol_index];
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (BOL_state_ && (_start_token == _start_buffer ||
- *(_start_token - 1) == '\n'))
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- const std::size_t state_ =
- ptr_[lookup_[static_cast<typename Traits::index_type>
- (*curr_++)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
- end_token_ = curr_;
- }
- }
- if (_start_token >= _end_buffer)
- {
- // No more tokens...
- unique_id_ = npos;
- return 0;
- }
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- start_state_ = *(ptr_ + state_index);
- end_token_ = curr_;
- }
- }
- if (end_state_)
- {
- // return longest match
- _end_token = end_token_;
- if (id_ == 0) goto again;
- }
- else
- {
- // No match causes char to be skipped
- _end_token = _start_token + 1;
- id_ = npos;
- uid_ = npos;
- }
- start_ = _start_token;
- end_ = _end_token;
- unique_id_ = uid_;
- return id_;
- }
- std::size_t next (const std::size_t * const lookup_,
- const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
- const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
- {
- _start_token = _end_token;
- const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
- const CharT *curr_ = _start_token;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + id_index);
- std::size_t uid_ = *(ptr_ + unique_id_index);
- const CharT *end_token_ = curr_;
- for (;;)
- {
- if (curr_ >= _end_buffer)
- {
- if (!reload_buffer (curr_, end_state_, end_token_))
- {
- // EOF
- break;
- }
- }
- const std::size_t BOL_state_ = ptr_[bol_index];
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (BOL_state_ && (_start_token == _start_buffer ||
- *(_start_token - 1) == '\n'))
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- const std::size_t state_ =
- ptr_[lookup_[static_cast<typename Traits::index_type>
- (*curr_++)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_token_ = curr_;
- }
- }
- if (_start_token >= _end_buffer)
- {
- // No more tokens...
- unique_id_ = npos;
- return 0;
- }
- const std::size_t EOL_state_ = ptr_[eol_index];
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + id_index);
- uid_ = *(ptr_ + unique_id_index);
- end_token_ = curr_;
- }
- }
- if (end_state_)
- {
- // return longest match
- _end_token = end_token_;
- }
- else
- {
- // No match causes char to be skipped
- _end_token = _start_token + 1;
- id_ = npos;
- uid_ = npos;
- }
- start_ = _start_token;
- end_ = _end_token;
- unique_id_ = uid_;
- return id_;
- }
- bool reload_buffer (const CharT * &curr_, const bool end_state_,
- const CharT * &end_token_)
- {
- bool success_ = !_stream->eof ();
- if (success_)
- {
- const CharT *old_start_token_ = _start_token;
- std::size_t old_size_ = _buffer.size ();
- std::size_t count_ = 0;
- if (_start_token - 1 == _start_buffer)
- {
- // Run out of buffer space, so increase.
- _buffer.resize (old_size_ + _buffer_increment, '!');
- _start_buffer = &_buffer.front ();
- _start_token = _start_buffer + 1;
- _stream->read (_start_buffer + old_size_,
- _buffer_increment);
- count_ = _stream->gcount ();
- _end_buffer = _start_buffer + old_size_ + count_;
- }
- else if (_start_token < _end_buffer)
- {
- const std::size_t len_ = _end_buffer - _start_token;
- // Some systems have memcpy in namespace std.
- using namespace std;
- memcpy (_start_buffer, _start_token - 1, (len_ + 1) *
- sizeof (CharT));
- _stream->read (_start_buffer + len_ + 1,
- static_cast<std::streamsize> (_buffer.size () - len_ - 1));
- count_ = _stream->gcount ();
- _start_token = _start_buffer + 1;
- _end_buffer = _start_buffer + len_ + 1 + count_;
- }
- else
- {
- _stream->read (_start_buffer, static_cast<std::streamsize>
- (_buffer.size ()));
- count_ = _stream->gcount ();
- _start_token = _start_buffer;
- _end_buffer = _start_buffer + count_;
- }
- if (end_state_)
- {
- end_token_ = _start_token +
- (end_token_ - old_start_token_);
- }
- curr_ = _start_token + (curr_ - old_start_token_);
- }
- return success_;
- }
- // Disallow copying of buffer
- basic_file_input (const basic_file_input &);
- const basic_file_input &operator = (const basic_file_input &);
- };
- typedef basic_file_input<char> file_input;
- typedef basic_file_input<wchar_t> wfile_input;
- }
- }
- #endif
|