123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407 |
- // string_token.hpp
- // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STRING_TOKEN_HPP
- #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STRING_TOKEN_HPP
- #include <algorithm>
- #include "size_t.hpp"
- #include "consts.hpp" // num_chars, num_wchar_ts
- #include <sstream>
- #include <string>
- #include <limits>
- namespace boost
- {
- namespace lexer
- {
- template<typename CharT>
- struct basic_string_token
- {
- typedef std::basic_string<CharT> string;
- bool _negated;
- string _charset;
- basic_string_token () :
- _negated (false)
- {
- }
- basic_string_token (const bool negated_, const string &charset_) :
- _negated (negated_),
- _charset (charset_)
- {
- }
- void remove_duplicates ()
- {
- const CharT *start_ = _charset.c_str ();
- const CharT *end_ = start_ + _charset.size ();
- // Optimisation for very large charsets:
- // sorting via pointers is much quicker than
- // via iterators...
- std::sort (const_cast<CharT *> (start_), const_cast<CharT *> (end_));
- _charset.erase (std::unique (_charset.begin (), _charset.end ()),
- _charset.end ());
- }
- void normalise ()
- {
- const std::size_t max_chars_ = sizeof (CharT) == 1 ?
- num_chars : num_wchar_ts;
- if (_charset.length () == max_chars_)
- {
- _negated = !_negated;
- _charset.clear ();
- }
- else if (_charset.length () > max_chars_ / 2)
- {
- negate ();
- }
- }
- void negate ()
- {
- const std::size_t max_chars_ = sizeof (CharT) == 1 ?
- num_chars : num_wchar_ts;
- CharT curr_char_ = (std::numeric_limits<CharT>::min)();
- string temp_;
- const CharT *curr_ = _charset.c_str ();
- const CharT *chars_end_ = curr_ + _charset.size ();
- _negated = !_negated;
- temp_.resize (max_chars_ - _charset.size ());
- CharT *ptr_ = const_cast<CharT *> (temp_.c_str ());
- std::size_t i_ = 0;
- while (curr_ < chars_end_)
- {
- while (*curr_ > curr_char_)
- {
- *ptr_ = curr_char_;
- ++ptr_;
- ++curr_char_;
- ++i_;
- }
- ++curr_char_;
- ++curr_;
- ++i_;
- }
- for (; i_ < max_chars_; ++i_)
- {
- *ptr_ = curr_char_;
- ++ptr_;
- ++curr_char_;
- }
- _charset = temp_;
- }
- bool operator < (const basic_string_token &rhs_) const
- {
- return _negated < rhs_._negated ||
- (_negated == rhs_._negated && _charset < rhs_._charset);
- }
- bool empty () const
- {
- return _charset.empty () && !_negated;
- }
- bool any () const
- {
- return _charset.empty () && _negated;
- }
- void clear ()
- {
- _negated = false;
- _charset.clear ();
- }
- void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
- {
- if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
- !any () && !rhs_.any ()))
- {
- intersect_same_types (rhs_, overlap_);
- }
- else
- {
- intersect_diff_types (rhs_, overlap_);
- }
- }
- static void escape_char (const CharT ch_, string &out_)
- {
- switch (ch_)
- {
- case '\0':
- out_ += '\\';
- out_ += '0';
- break;
- case '\a':
- out_ += '\\';
- out_ += 'a';
- break;
- case '\b':
- out_ += '\\';
- out_ += 'b';
- break;
- case 27:
- out_ += '\\';
- out_ += 'x';
- out_ += '1';
- out_ += 'b';
- break;
- case '\f':
- out_ += '\\';
- out_ += 'f';
- break;
- case '\n':
- out_ += '\\';
- out_ += 'n';
- break;
- case '\r':
- out_ += '\\';
- out_ += 'r';
- break;
- case '\t':
- out_ += '\\';
- out_ += 't';
- break;
- case '\v':
- out_ += '\\';
- out_ += 'v';
- break;
- case '\\':
- out_ += '\\';
- out_ += '\\';
- break;
- case '"':
- out_ += '\\';
- out_ += '"';
- break;
- case '\'':
- out_ += '\\';
- out_ += '\'';
- break;
- default:
- {
- if (ch_ < 32 && ch_ >= 0)
- {
- std::basic_stringstream<CharT> ss_;
- out_ += '\\';
- out_ += 'x';
- ss_ << std::hex <<
- static_cast<std::size_t> (ch_);
- out_ += ss_.str ();
- }
- else
- {
- out_ += ch_;
- }
- break;
- }
- }
- }
- private:
- void intersect_same_types (basic_string_token &rhs_,
- basic_string_token &overlap_)
- {
- if (any ())
- {
- clear ();
- overlap_._negated = true;
- rhs_.clear ();
- }
- else
- {
- typename string::iterator iter_ = _charset.begin ();
- typename string::iterator end_ = _charset.end ();
- typename string::iterator rhs_iter_ = rhs_._charset.begin ();
- typename string::iterator rhs_end_ = rhs_._charset.end ();
- overlap_._negated = _negated;
- while (iter_ != end_ && rhs_iter_ != rhs_end_)
- {
- if (*iter_ < *rhs_iter_)
- {
- ++iter_;
- }
- else if (*iter_ > *rhs_iter_)
- {
- ++rhs_iter_;
- }
- else
- {
- overlap_._charset += *iter_;
- iter_ = _charset.erase (iter_);
- end_ = _charset.end ();
- rhs_iter_ = rhs_._charset.erase (rhs_iter_);
- rhs_end_ = rhs_._charset.end ();
- }
- }
- if (_negated)
- {
- // duplicates already merged, so safe to merge
- // using std lib.
- // src, dest
- merge (_charset, overlap_._charset);
- // duplicates already merged, so safe to merge
- // using std lib.
- // src, dest
- merge (rhs_._charset, overlap_._charset);
- _negated = false;
- rhs_._negated = false;
- std::swap (_charset, rhs_._charset);
- normalise ();
- overlap_.normalise ();
- rhs_.normalise ();
- }
- else if (!overlap_._charset.empty ())
- {
- normalise ();
- overlap_.normalise ();
- rhs_.normalise ();
- }
- }
- }
- void intersect_diff_types (basic_string_token &rhs_,
- basic_string_token &overlap_)
- {
- if (any ())
- {
- intersect_any (rhs_, overlap_);
- }
- else if (_negated)
- {
- intersect_negated (rhs_, overlap_);
- }
- else // _negated == false
- {
- intersect_charset (rhs_, overlap_);
- }
- }
- void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_)
- {
- if (rhs_._negated)
- {
- rhs_.intersect_negated (*this, overlap_);
- }
- else // rhs._negated == false
- {
- rhs_.intersect_charset (*this, overlap_);
- }
- }
- void intersect_negated (basic_string_token &rhs_,
- basic_string_token &overlap_)
- {
- if (rhs_.any ())
- {
- overlap_._negated = true;
- overlap_._charset = _charset;
- rhs_._negated = false;
- rhs_._charset = _charset;
- clear ();
- }
- else // rhs._negated == false
- {
- rhs_.intersect_charset (*this, overlap_);
- }
- }
- void intersect_charset (basic_string_token &rhs_,
- basic_string_token &overlap_)
- {
- if (rhs_.any ())
- {
- overlap_._charset = _charset;
- rhs_._negated = true;
- rhs_._charset = _charset;
- clear ();
- }
- else // rhs_._negated == true
- {
- typename string::iterator iter_ = _charset.begin ();
- typename string::iterator end_ = _charset.end ();
- typename string::iterator rhs_iter_ = rhs_._charset.begin ();
- typename string::iterator rhs_end_ = rhs_._charset.end ();
- while (iter_ != end_ && rhs_iter_ != rhs_end_)
- {
- if (*iter_ < *rhs_iter_)
- {
- overlap_._charset += *iter_;
- rhs_iter_ = rhs_._charset.insert (rhs_iter_, *iter_);
- ++rhs_iter_;
- rhs_end_ = rhs_._charset.end ();
- iter_ = _charset.erase (iter_);
- end_ = _charset.end ();
- }
- else if (*iter_ > *rhs_iter_)
- {
- ++rhs_iter_;
- }
- else
- {
- ++iter_;
- ++rhs_iter_;
- }
- }
- if (iter_ != end_)
- {
- // nothing bigger in rhs_ than iter_,
- // so safe to merge using std lib.
- string temp_ (iter_, end_);
- // src, dest
- merge (temp_, overlap_._charset);
- _charset.erase (iter_, end_);
- }
- if (!overlap_._charset.empty ())
- {
- merge (overlap_._charset, rhs_._charset);
- // possible duplicates, so check for any and erase.
- rhs_._charset.erase (std::unique (rhs_._charset.begin (),
- rhs_._charset.end ()), rhs_._charset.end ());
- normalise ();
- overlap_.normalise ();
- rhs_.normalise ();
- }
- }
- }
- void merge (string &src_, string &dest_)
- {
- string tmp_ (src_.size () + dest_.size (), 0);
- std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (),
- tmp_.begin ());
- dest_ = tmp_;
- }
- };
- }
- }
- #endif
|