basic_regex_parser.hpp 111 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE basic_regex_parser.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares template class basic_regex_parser.
  16. */
  17. #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
  18. #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
  19. #ifdef BOOST_MSVC
  20. #pragma warning(push)
  21. #pragma warning(disable: 4103)
  22. #if BOOST_MSVC >= 1800
  23. #pragma warning(disable: 26812)
  24. #endif
  25. #endif
  26. #ifdef BOOST_HAS_ABI_HEADERS
  27. # include BOOST_ABI_PREFIX
  28. #endif
  29. #ifdef BOOST_MSVC
  30. #pragma warning(pop)
  31. #endif
  32. namespace boost{
  33. namespace BOOST_REGEX_DETAIL_NS{
  34. #ifdef BOOST_MSVC
  35. #pragma warning(push)
  36. #pragma warning(disable:4244)
  37. #if BOOST_MSVC < 1910
  38. #pragma warning(disable:4800)
  39. #endif
  40. #endif
  41. inline boost::intmax_t umax(mpl::false_ const&)
  42. {
  43. // Get out clause here, just in case numeric_limits is unspecialized:
  44. return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
  45. }
  46. inline boost::intmax_t umax(mpl::true_ const&)
  47. {
  48. return (std::numeric_limits<std::size_t>::max)();
  49. }
  50. inline boost::intmax_t umax()
  51. {
  52. return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
  53. }
  54. template <class charT, class traits>
  55. class basic_regex_parser : public basic_regex_creator<charT, traits>
  56. {
  57. public:
  58. basic_regex_parser(regex_data<charT, traits>* data);
  59. void parse(const charT* p1, const charT* p2, unsigned flags);
  60. void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
  61. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
  62. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
  63. {
  64. fail(error_code, position, message, position);
  65. }
  66. bool parse_all();
  67. bool parse_basic();
  68. bool parse_extended();
  69. bool parse_literal();
  70. bool parse_open_paren();
  71. bool parse_basic_escape();
  72. bool parse_extended_escape();
  73. bool parse_match_any();
  74. bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
  75. bool parse_repeat_range(bool isbasic);
  76. bool parse_alt();
  77. bool parse_set();
  78. bool parse_backref();
  79. void parse_set_literal(basic_char_set<charT, traits>& char_set);
  80. bool parse_inner_set(basic_char_set<charT, traits>& char_set);
  81. bool parse_QE();
  82. bool parse_perl_extension();
  83. bool parse_perl_verb();
  84. bool match_verb(const char*);
  85. bool add_emacs_code(bool negate);
  86. bool unwind_alts(std::ptrdiff_t last_paren_start);
  87. digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
  88. charT unescape_character();
  89. regex_constants::syntax_option_type parse_options();
  90. private:
  91. typedef bool (basic_regex_parser::*parser_proc_type)();
  92. typedef typename traits::string_type string_type;
  93. typedef typename traits::char_class_type char_class_type;
  94. parser_proc_type m_parser_proc; // the main parser to use
  95. const charT* m_base; // the start of the string being parsed
  96. const charT* m_end; // the end of the string being parsed
  97. const charT* m_position; // our current parser position
  98. unsigned m_mark_count; // how many sub-expressions we have
  99. int m_mark_reset; // used to indicate that we're inside a (?|...) block.
  100. unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
  101. std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
  102. std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
  103. bool m_has_case_change; // true if somewhere in the current block the case has changed
  104. unsigned m_recursion_count; // How many times we've called parse_all.
  105. #if defined(BOOST_MSVC) && defined(_M_IX86)
  106. // This is an ugly warning suppression workaround (for warnings *inside* std::vector
  107. // that can not otherwise be suppressed)...
  108. BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
  109. std::vector<long> m_alt_jumps; // list of alternative in the current scope.
  110. #else
  111. std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
  112. #endif
  113. basic_regex_parser& operator=(const basic_regex_parser&);
  114. basic_regex_parser(const basic_regex_parser&);
  115. };
  116. template <class charT, class traits>
  117. basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
  118. : basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0),
  119. m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
  120. {
  121. }
  122. template <class charT, class traits>
  123. void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
  124. {
  125. // pass l_flags on to base class:
  126. this->init(l_flags);
  127. // set up pointers:
  128. m_position = m_base = p1;
  129. m_end = p2;
  130. // empty strings are errors:
  131. if((p1 == p2) &&
  132. (
  133. ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
  134. || (l_flags & regbase::no_empty_expressions)
  135. )
  136. )
  137. {
  138. fail(regex_constants::error_empty, 0);
  139. return;
  140. }
  141. // select which parser to use:
  142. switch(l_flags & regbase::main_option_type)
  143. {
  144. case regbase::perl_syntax_group:
  145. {
  146. m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
  147. //
  148. // Add a leading paren with index zero to give recursions a target:
  149. //
  150. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  151. br->index = 0;
  152. br->icase = this->flags() & regbase::icase;
  153. break;
  154. }
  155. case regbase::basic_syntax_group:
  156. m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
  157. break;
  158. case regbase::literal:
  159. m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
  160. break;
  161. default:
  162. // Oops, someone has managed to set more than one of the main option flags,
  163. // so this must be an error:
  164. fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
  165. return;
  166. }
  167. // parse all our characters:
  168. bool result = parse_all();
  169. //
  170. // Unwind our alternatives:
  171. //
  172. unwind_alts(-1);
  173. // reset l_flags as a global scope (?imsx) may have altered them:
  174. this->flags(l_flags);
  175. // if we haven't gobbled up all the characters then we must
  176. // have had an unexpected ')' :
  177. if(!result)
  178. {
  179. fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis.");
  180. return;
  181. }
  182. // if an error has been set then give up now:
  183. if(this->m_pdata->m_status)
  184. return;
  185. // fill in our sub-expression count:
  186. this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
  187. this->finalize(p1, p2);
  188. }
  189. template <class charT, class traits>
  190. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
  191. {
  192. // get the error message:
  193. std::string message = this->m_pdata->m_ptraits->error_string(error_code);
  194. fail(error_code, position, message);
  195. }
  196. template <class charT, class traits>
  197. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
  198. {
  199. if(0 == this->m_pdata->m_status) // update the error code if not already set
  200. this->m_pdata->m_status = error_code;
  201. m_position = m_end; // don't bother parsing anything else
  202. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  203. //
  204. // Augment error message with the regular expression text:
  205. //
  206. if(start_pos == position)
  207. start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
  208. std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
  209. if(error_code != regex_constants::error_empty)
  210. {
  211. if((start_pos != 0) || (end_pos != (m_end - m_base)))
  212. message += " The error occurred while parsing the regular expression fragment: '";
  213. else
  214. message += " The error occurred while parsing the regular expression: '";
  215. if(start_pos != end_pos)
  216. {
  217. message += std::string(m_base + start_pos, m_base + position);
  218. message += ">>>HERE>>>";
  219. message += std::string(m_base + position, m_base + end_pos);
  220. }
  221. message += "'.";
  222. }
  223. #endif
  224. #ifndef BOOST_NO_EXCEPTIONS
  225. if(0 == (this->flags() & regex_constants::no_except))
  226. {
  227. boost::regex_error e(message, error_code, position);
  228. e.raise();
  229. }
  230. #else
  231. (void)position; // suppress warnings.
  232. #endif
  233. }
  234. template <class charT, class traits>
  235. bool basic_regex_parser<charT, traits>::parse_all()
  236. {
  237. if (++m_recursion_count > 400)
  238. {
  239. // exceeded internal limits
  240. fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
  241. }
  242. bool result = true;
  243. while(result && (m_position != m_end))
  244. {
  245. result = (this->*m_parser_proc)();
  246. }
  247. --m_recursion_count;
  248. return result;
  249. }
  250. #ifdef BOOST_MSVC
  251. #pragma warning(push)
  252. #pragma warning(disable:4702)
  253. #endif
  254. template <class charT, class traits>
  255. bool basic_regex_parser<charT, traits>::parse_basic()
  256. {
  257. switch(this->m_traits.syntax_type(*m_position))
  258. {
  259. case regex_constants::syntax_escape:
  260. return parse_basic_escape();
  261. case regex_constants::syntax_dot:
  262. return parse_match_any();
  263. case regex_constants::syntax_caret:
  264. ++m_position;
  265. this->append_state(syntax_element_start_line);
  266. break;
  267. case regex_constants::syntax_dollar:
  268. ++m_position;
  269. this->append_state(syntax_element_end_line);
  270. break;
  271. case regex_constants::syntax_star:
  272. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
  273. return parse_literal();
  274. else
  275. {
  276. ++m_position;
  277. return parse_repeat();
  278. }
  279. case regex_constants::syntax_plus:
  280. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  281. return parse_literal();
  282. else
  283. {
  284. ++m_position;
  285. return parse_repeat(1);
  286. }
  287. case regex_constants::syntax_question:
  288. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  289. return parse_literal();
  290. else
  291. {
  292. ++m_position;
  293. return parse_repeat(0, 1);
  294. }
  295. case regex_constants::syntax_open_set:
  296. return parse_set();
  297. case regex_constants::syntax_newline:
  298. if(this->flags() & regbase::newline_alt)
  299. return parse_alt();
  300. else
  301. return parse_literal();
  302. default:
  303. return parse_literal();
  304. }
  305. return true;
  306. }
  307. #ifdef BOOST_MSVC
  308. # pragma warning(push)
  309. #if BOOST_MSVC >= 1800
  310. #pragma warning(disable:26812)
  311. #endif
  312. #endif
  313. template <class charT, class traits>
  314. bool basic_regex_parser<charT, traits>::parse_extended()
  315. {
  316. bool result = true;
  317. switch(this->m_traits.syntax_type(*m_position))
  318. {
  319. case regex_constants::syntax_open_mark:
  320. return parse_open_paren();
  321. case regex_constants::syntax_close_mark:
  322. return false;
  323. case regex_constants::syntax_escape:
  324. return parse_extended_escape();
  325. case regex_constants::syntax_dot:
  326. return parse_match_any();
  327. case regex_constants::syntax_caret:
  328. ++m_position;
  329. this->append_state(
  330. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
  331. break;
  332. case regex_constants::syntax_dollar:
  333. ++m_position;
  334. this->append_state(
  335. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
  336. break;
  337. case regex_constants::syntax_star:
  338. if(m_position == this->m_base)
  339. {
  340. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
  341. return false;
  342. }
  343. ++m_position;
  344. return parse_repeat();
  345. case regex_constants::syntax_question:
  346. if(m_position == this->m_base)
  347. {
  348. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
  349. return false;
  350. }
  351. ++m_position;
  352. return parse_repeat(0,1);
  353. case regex_constants::syntax_plus:
  354. if(m_position == this->m_base)
  355. {
  356. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
  357. return false;
  358. }
  359. ++m_position;
  360. return parse_repeat(1);
  361. case regex_constants::syntax_open_brace:
  362. ++m_position;
  363. return parse_repeat_range(false);
  364. case regex_constants::syntax_close_brace:
  365. if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
  366. {
  367. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  368. return false;
  369. }
  370. result = parse_literal();
  371. break;
  372. case regex_constants::syntax_or:
  373. return parse_alt();
  374. case regex_constants::syntax_open_set:
  375. return parse_set();
  376. case regex_constants::syntax_newline:
  377. if(this->flags() & regbase::newline_alt)
  378. return parse_alt();
  379. else
  380. return parse_literal();
  381. case regex_constants::syntax_hash:
  382. //
  383. // If we have a mod_x flag set, then skip until
  384. // we get to a newline character:
  385. //
  386. if((this->flags()
  387. & (regbase::no_perl_ex|regbase::mod_x))
  388. == regbase::mod_x)
  389. {
  390. while((m_position != m_end) && !is_separator(*m_position++)){}
  391. return true;
  392. }
  393. BOOST_FALLTHROUGH;
  394. default:
  395. result = parse_literal();
  396. break;
  397. }
  398. return result;
  399. }
  400. #ifdef BOOST_MSVC
  401. # pragma warning(pop)
  402. #endif
  403. #ifdef BOOST_MSVC
  404. #pragma warning(pop)
  405. #endif
  406. template <class charT, class traits>
  407. bool basic_regex_parser<charT, traits>::parse_literal()
  408. {
  409. // append this as a literal provided it's not a space character
  410. // or the perl option regbase::mod_x is not set:
  411. if(
  412. ((this->flags()
  413. & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
  414. != regbase::mod_x)
  415. || !this->m_traits.isctype(*m_position, this->m_mask_space))
  416. this->append_literal(*m_position);
  417. ++m_position;
  418. return true;
  419. }
  420. template <class charT, class traits>
  421. bool basic_regex_parser<charT, traits>::parse_open_paren()
  422. {
  423. //
  424. // skip the '(' and error check:
  425. //
  426. if(++m_position == m_end)
  427. {
  428. fail(regex_constants::error_paren, m_position - m_base);
  429. return false;
  430. }
  431. //
  432. // begin by checking for a perl-style (?...) extension:
  433. //
  434. if(
  435. ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
  436. || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  437. )
  438. {
  439. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
  440. return parse_perl_extension();
  441. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
  442. return parse_perl_verb();
  443. }
  444. //
  445. // update our mark count, and append the required state:
  446. //
  447. unsigned markid = 0;
  448. if(0 == (this->flags() & regbase::nosubs))
  449. {
  450. markid = ++m_mark_count;
  451. #ifndef BOOST_NO_STD_DISTANCE
  452. if(this->flags() & regbase::save_subexpression_location)
  453. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
  454. #else
  455. if(this->flags() & regbase::save_subexpression_location)
  456. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
  457. #endif
  458. }
  459. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  460. pb->index = markid;
  461. pb->icase = this->flags() & regbase::icase;
  462. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  463. // back up insertion point for alternations, and set new point:
  464. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  465. this->m_pdata->m_data.align();
  466. m_alt_insert_point = this->m_pdata->m_data.size();
  467. //
  468. // back up the current flags in case we have a nested (?imsx) group:
  469. //
  470. regex_constants::syntax_option_type opts = this->flags();
  471. bool old_case_change = m_has_case_change;
  472. m_has_case_change = false; // no changes to this scope as yet...
  473. //
  474. // Back up branch reset data in case we have a nested (?|...)
  475. //
  476. int mark_reset = m_mark_reset;
  477. m_mark_reset = -1;
  478. //
  479. // now recursively add more states, this will terminate when we get to a
  480. // matching ')' :
  481. //
  482. parse_all();
  483. //
  484. // Unwind pushed alternatives:
  485. //
  486. if(0 == unwind_alts(last_paren_start))
  487. return false;
  488. //
  489. // restore flags:
  490. //
  491. if(m_has_case_change)
  492. {
  493. // the case has changed in one or more of the alternatives
  494. // within the scoped (...) block: we have to add a state
  495. // to reset the case sensitivity:
  496. static_cast<re_case*>(
  497. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  498. )->icase = opts & regbase::icase;
  499. }
  500. this->flags(opts);
  501. m_has_case_change = old_case_change;
  502. //
  503. // restore branch reset:
  504. //
  505. m_mark_reset = mark_reset;
  506. //
  507. // we either have a ')' or we have run out of characters prematurely:
  508. //
  509. if(m_position == m_end)
  510. {
  511. this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
  512. return false;
  513. }
  514. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  515. return false;
  516. #ifndef BOOST_NO_STD_DISTANCE
  517. if(markid && (this->flags() & regbase::save_subexpression_location))
  518. this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
  519. #else
  520. if(markid && (this->flags() & regbase::save_subexpression_location))
  521. this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
  522. #endif
  523. ++m_position;
  524. //
  525. // append closing parenthesis state:
  526. //
  527. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  528. pb->index = markid;
  529. pb->icase = this->flags() & regbase::icase;
  530. this->m_paren_start = last_paren_start;
  531. //
  532. // restore the alternate insertion point:
  533. //
  534. this->m_alt_insert_point = last_alt_point;
  535. //
  536. // allow backrefs to this mark:
  537. //
  538. if(markid > 0)
  539. this->m_backrefs.set(markid);
  540. return true;
  541. }
  542. template <class charT, class traits>
  543. bool basic_regex_parser<charT, traits>::parse_basic_escape()
  544. {
  545. if(++m_position == m_end)
  546. {
  547. fail(regex_constants::error_paren, m_position - m_base);
  548. return false;
  549. }
  550. bool result = true;
  551. switch(this->m_traits.escape_syntax_type(*m_position))
  552. {
  553. case regex_constants::syntax_open_mark:
  554. return parse_open_paren();
  555. case regex_constants::syntax_close_mark:
  556. return false;
  557. case regex_constants::syntax_plus:
  558. if(this->flags() & regex_constants::bk_plus_qm)
  559. {
  560. ++m_position;
  561. return parse_repeat(1);
  562. }
  563. else
  564. return parse_literal();
  565. case regex_constants::syntax_question:
  566. if(this->flags() & regex_constants::bk_plus_qm)
  567. {
  568. ++m_position;
  569. return parse_repeat(0, 1);
  570. }
  571. else
  572. return parse_literal();
  573. case regex_constants::syntax_open_brace:
  574. if(this->flags() & regbase::no_intervals)
  575. return parse_literal();
  576. ++m_position;
  577. return parse_repeat_range(true);
  578. case regex_constants::syntax_close_brace:
  579. if(this->flags() & regbase::no_intervals)
  580. return parse_literal();
  581. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  582. return false;
  583. case regex_constants::syntax_or:
  584. if(this->flags() & regbase::bk_vbar)
  585. return parse_alt();
  586. else
  587. result = parse_literal();
  588. break;
  589. case regex_constants::syntax_digit:
  590. return parse_backref();
  591. case regex_constants::escape_type_start_buffer:
  592. if(this->flags() & regbase::emacs_ex)
  593. {
  594. ++m_position;
  595. this->append_state(syntax_element_buffer_start);
  596. }
  597. else
  598. result = parse_literal();
  599. break;
  600. case regex_constants::escape_type_end_buffer:
  601. if(this->flags() & regbase::emacs_ex)
  602. {
  603. ++m_position;
  604. this->append_state(syntax_element_buffer_end);
  605. }
  606. else
  607. result = parse_literal();
  608. break;
  609. case regex_constants::escape_type_word_assert:
  610. if(this->flags() & regbase::emacs_ex)
  611. {
  612. ++m_position;
  613. this->append_state(syntax_element_word_boundary);
  614. }
  615. else
  616. result = parse_literal();
  617. break;
  618. case regex_constants::escape_type_not_word_assert:
  619. if(this->flags() & regbase::emacs_ex)
  620. {
  621. ++m_position;
  622. this->append_state(syntax_element_within_word);
  623. }
  624. else
  625. result = parse_literal();
  626. break;
  627. case regex_constants::escape_type_left_word:
  628. if(this->flags() & regbase::emacs_ex)
  629. {
  630. ++m_position;
  631. this->append_state(syntax_element_word_start);
  632. }
  633. else
  634. result = parse_literal();
  635. break;
  636. case regex_constants::escape_type_right_word:
  637. if(this->flags() & regbase::emacs_ex)
  638. {
  639. ++m_position;
  640. this->append_state(syntax_element_word_end);
  641. }
  642. else
  643. result = parse_literal();
  644. break;
  645. default:
  646. if(this->flags() & regbase::emacs_ex)
  647. {
  648. bool negate = true;
  649. switch(*m_position)
  650. {
  651. case 'w':
  652. negate = false;
  653. BOOST_FALLTHROUGH;
  654. case 'W':
  655. {
  656. basic_char_set<charT, traits> char_set;
  657. if(negate)
  658. char_set.negate();
  659. char_set.add_class(this->m_word_mask);
  660. if(0 == this->append_set(char_set))
  661. {
  662. fail(regex_constants::error_ctype, m_position - m_base);
  663. return false;
  664. }
  665. ++m_position;
  666. return true;
  667. }
  668. case 's':
  669. negate = false;
  670. BOOST_FALLTHROUGH;
  671. case 'S':
  672. return add_emacs_code(negate);
  673. case 'c':
  674. case 'C':
  675. // not supported yet:
  676. fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
  677. return false;
  678. default:
  679. break;
  680. }
  681. }
  682. result = parse_literal();
  683. break;
  684. }
  685. return result;
  686. }
  687. template <class charT, class traits>
  688. bool basic_regex_parser<charT, traits>::parse_extended_escape()
  689. {
  690. ++m_position;
  691. if(m_position == m_end)
  692. {
  693. fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
  694. return false;
  695. }
  696. bool negate = false; // in case this is a character class escape: \w \d etc
  697. switch(this->m_traits.escape_syntax_type(*m_position))
  698. {
  699. case regex_constants::escape_type_not_class:
  700. negate = true;
  701. BOOST_FALLTHROUGH;
  702. case regex_constants::escape_type_class:
  703. {
  704. escape_type_class_jump:
  705. typedef typename traits::char_class_type m_type;
  706. m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  707. if(m != 0)
  708. {
  709. basic_char_set<charT, traits> char_set;
  710. if(negate)
  711. char_set.negate();
  712. char_set.add_class(m);
  713. if(0 == this->append_set(char_set))
  714. {
  715. fail(regex_constants::error_ctype, m_position - m_base);
  716. return false;
  717. }
  718. ++m_position;
  719. return true;
  720. }
  721. //
  722. // not a class, just a regular unknown escape:
  723. //
  724. this->append_literal(unescape_character());
  725. break;
  726. }
  727. case regex_constants::syntax_digit:
  728. return parse_backref();
  729. case regex_constants::escape_type_left_word:
  730. ++m_position;
  731. this->append_state(syntax_element_word_start);
  732. break;
  733. case regex_constants::escape_type_right_word:
  734. ++m_position;
  735. this->append_state(syntax_element_word_end);
  736. break;
  737. case regex_constants::escape_type_start_buffer:
  738. ++m_position;
  739. this->append_state(syntax_element_buffer_start);
  740. break;
  741. case regex_constants::escape_type_end_buffer:
  742. ++m_position;
  743. this->append_state(syntax_element_buffer_end);
  744. break;
  745. case regex_constants::escape_type_word_assert:
  746. ++m_position;
  747. this->append_state(syntax_element_word_boundary);
  748. break;
  749. case regex_constants::escape_type_not_word_assert:
  750. ++m_position;
  751. this->append_state(syntax_element_within_word);
  752. break;
  753. case regex_constants::escape_type_Z:
  754. ++m_position;
  755. this->append_state(syntax_element_soft_buffer_end);
  756. break;
  757. case regex_constants::escape_type_Q:
  758. return parse_QE();
  759. case regex_constants::escape_type_C:
  760. return parse_match_any();
  761. case regex_constants::escape_type_X:
  762. ++m_position;
  763. this->append_state(syntax_element_combining);
  764. break;
  765. case regex_constants::escape_type_G:
  766. ++m_position;
  767. this->append_state(syntax_element_restart_continue);
  768. break;
  769. case regex_constants::escape_type_not_property:
  770. negate = true;
  771. BOOST_FALLTHROUGH;
  772. case regex_constants::escape_type_property:
  773. {
  774. ++m_position;
  775. char_class_type m;
  776. if(m_position == m_end)
  777. {
  778. fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
  779. return false;
  780. }
  781. // maybe have \p{ddd}
  782. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  783. {
  784. const charT* base = m_position;
  785. // skip forward until we find enclosing brace:
  786. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  787. ++m_position;
  788. if(m_position == m_end)
  789. {
  790. fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
  791. return false;
  792. }
  793. m = this->m_traits.lookup_classname(++base, m_position++);
  794. }
  795. else
  796. {
  797. m = this->m_traits.lookup_classname(m_position, m_position+1);
  798. ++m_position;
  799. }
  800. if(m != 0)
  801. {
  802. basic_char_set<charT, traits> char_set;
  803. if(negate)
  804. char_set.negate();
  805. char_set.add_class(m);
  806. if(0 == this->append_set(char_set))
  807. {
  808. fail(regex_constants::error_ctype, m_position - m_base);
  809. return false;
  810. }
  811. return true;
  812. }
  813. fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
  814. return false;
  815. }
  816. case regex_constants::escape_type_reset_start_mark:
  817. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  818. {
  819. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  820. pb->index = -5;
  821. pb->icase = this->flags() & regbase::icase;
  822. this->m_pdata->m_data.align();
  823. ++m_position;
  824. return true;
  825. }
  826. goto escape_type_class_jump;
  827. case regex_constants::escape_type_line_ending:
  828. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  829. {
  830. const charT* e = get_escape_R_string<charT>();
  831. const charT* old_position = m_position;
  832. const charT* old_end = m_end;
  833. const charT* old_base = m_base;
  834. m_position = e;
  835. m_base = e;
  836. m_end = e + traits::length(e);
  837. bool r = parse_all();
  838. m_position = ++old_position;
  839. m_end = old_end;
  840. m_base = old_base;
  841. return r;
  842. }
  843. goto escape_type_class_jump;
  844. case regex_constants::escape_type_extended_backref:
  845. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  846. {
  847. bool have_brace = false;
  848. bool negative = false;
  849. static const char incomplete_message[] = "Incomplete \\g escape found.";
  850. if(++m_position == m_end)
  851. {
  852. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  853. return false;
  854. }
  855. // maybe have \g{ddd}
  856. regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
  857. regex_constants::syntax_type syn_end = 0;
  858. if((syn == regex_constants::syntax_open_brace)
  859. || (syn == regex_constants::escape_type_left_word)
  860. || (syn == regex_constants::escape_type_end_buffer))
  861. {
  862. if(++m_position == m_end)
  863. {
  864. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  865. return false;
  866. }
  867. have_brace = true;
  868. switch(syn)
  869. {
  870. case regex_constants::syntax_open_brace:
  871. syn_end = regex_constants::syntax_close_brace;
  872. break;
  873. case regex_constants::escape_type_left_word:
  874. syn_end = regex_constants::escape_type_right_word;
  875. break;
  876. default:
  877. syn_end = regex_constants::escape_type_end_buffer;
  878. break;
  879. }
  880. }
  881. negative = (*m_position == static_cast<charT>('-'));
  882. if((negative) && (++m_position == m_end))
  883. {
  884. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  885. return false;
  886. }
  887. const charT* pc = m_position;
  888. boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
  889. if((i < 0) && syn_end)
  890. {
  891. // Check for a named capture, get the leftmost one if there is more than one:
  892. const charT* base = m_position;
  893. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
  894. {
  895. ++m_position;
  896. }
  897. i = hash_value_from_capture_name(base, m_position);
  898. pc = m_position;
  899. }
  900. if(negative)
  901. i = 1 + (static_cast<boost::intmax_t>(m_mark_count) - i);
  902. if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test(i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id(i)))))
  903. {
  904. m_position = pc;
  905. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  906. pb->index = i;
  907. pb->icase = this->flags() & regbase::icase;
  908. }
  909. else
  910. {
  911. fail(regex_constants::error_backref, m_position - m_base);
  912. return false;
  913. }
  914. m_position = pc;
  915. if(have_brace)
  916. {
  917. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
  918. {
  919. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  920. return false;
  921. }
  922. ++m_position;
  923. }
  924. return true;
  925. }
  926. goto escape_type_class_jump;
  927. case regex_constants::escape_type_control_v:
  928. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  929. goto escape_type_class_jump;
  930. BOOST_FALLTHROUGH;
  931. default:
  932. this->append_literal(unescape_character());
  933. break;
  934. }
  935. return true;
  936. }
  937. template <class charT, class traits>
  938. bool basic_regex_parser<charT, traits>::parse_match_any()
  939. {
  940. //
  941. // we have a '.' that can match any character:
  942. //
  943. ++m_position;
  944. static_cast<re_dot*>(
  945. this->append_state(syntax_element_wild, sizeof(re_dot))
  946. )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
  947. ? BOOST_REGEX_DETAIL_NS::force_not_newline
  948. : this->flags() & regbase::mod_s ?
  949. BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
  950. return true;
  951. }
  952. template <class charT, class traits>
  953. bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
  954. {
  955. bool greedy = true;
  956. bool possessive = false;
  957. std::size_t insert_point;
  958. //
  959. // when we get to here we may have a non-greedy ? mark still to come:
  960. //
  961. if((m_position != m_end)
  962. && (
  963. (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  964. || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
  965. )
  966. )
  967. {
  968. // OK we have a perl or emacs regex, check for a '?':
  969. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  970. {
  971. // whitespace skip:
  972. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  973. ++m_position;
  974. }
  975. if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
  976. {
  977. greedy = false;
  978. ++m_position;
  979. }
  980. // for perl regexes only check for possessive ++ repeats.
  981. if((m_position != m_end)
  982. && (0 == (this->flags() & regbase::main_option_type))
  983. && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
  984. {
  985. possessive = true;
  986. ++m_position;
  987. }
  988. }
  989. if(0 == this->m_last_state)
  990. {
  991. fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
  992. return false;
  993. }
  994. if(this->m_last_state->type == syntax_element_endmark)
  995. {
  996. // insert a repeat before the '(' matching the last ')':
  997. insert_point = this->m_paren_start;
  998. }
  999. else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
  1000. {
  1001. // the last state was a literal with more than one character, split it in two:
  1002. re_literal* lit = static_cast<re_literal*>(this->m_last_state);
  1003. charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
  1004. lit->length -= 1;
  1005. // now append new state:
  1006. lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
  1007. lit->length = 1;
  1008. (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
  1009. insert_point = this->getoffset(this->m_last_state);
  1010. }
  1011. else
  1012. {
  1013. // repeat the last state whatever it was, need to add some error checking here:
  1014. switch(this->m_last_state->type)
  1015. {
  1016. case syntax_element_start_line:
  1017. case syntax_element_end_line:
  1018. case syntax_element_word_boundary:
  1019. case syntax_element_within_word:
  1020. case syntax_element_word_start:
  1021. case syntax_element_word_end:
  1022. case syntax_element_buffer_start:
  1023. case syntax_element_buffer_end:
  1024. case syntax_element_alt:
  1025. case syntax_element_soft_buffer_end:
  1026. case syntax_element_restart_continue:
  1027. case syntax_element_jump:
  1028. case syntax_element_startmark:
  1029. case syntax_element_backstep:
  1030. case syntax_element_toggle_case:
  1031. // can't legally repeat any of the above:
  1032. fail(regex_constants::error_badrepeat, m_position - m_base);
  1033. return false;
  1034. default:
  1035. // do nothing...
  1036. break;
  1037. }
  1038. insert_point = this->getoffset(this->m_last_state);
  1039. }
  1040. //
  1041. // OK we now know what to repeat, so insert the repeat around it:
  1042. //
  1043. re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
  1044. rep->min = low;
  1045. rep->max = high;
  1046. rep->greedy = greedy;
  1047. rep->leading = false;
  1048. // store our repeater position for later:
  1049. std::ptrdiff_t rep_off = this->getoffset(rep);
  1050. // and append a back jump to the repeat:
  1051. re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1052. jmp->alt.i = rep_off - this->getoffset(jmp);
  1053. this->m_pdata->m_data.align();
  1054. // now fill in the alt jump for the repeat:
  1055. rep = static_cast<re_repeat*>(this->getaddress(rep_off));
  1056. rep->alt.i = this->m_pdata->m_data.size() - rep_off;
  1057. //
  1058. // If the repeat is possessive then bracket the repeat with a (?>...)
  1059. // independent sub-expression construct:
  1060. //
  1061. if(possessive)
  1062. {
  1063. if(m_position != m_end)
  1064. {
  1065. //
  1066. // Check for illegal following quantifier, we have to do this here, because
  1067. // the extra states we insert below circumvents our usual error checking :-(
  1068. //
  1069. bool contin = false;
  1070. do
  1071. {
  1072. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  1073. {
  1074. // whitespace skip:
  1075. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1076. ++m_position;
  1077. }
  1078. if (m_position != m_end)
  1079. {
  1080. switch (this->m_traits.syntax_type(*m_position))
  1081. {
  1082. case regex_constants::syntax_star:
  1083. case regex_constants::syntax_plus:
  1084. case regex_constants::syntax_question:
  1085. case regex_constants::syntax_open_brace:
  1086. fail(regex_constants::error_badrepeat, m_position - m_base);
  1087. return false;
  1088. case regex_constants::syntax_open_mark:
  1089. // Do we have a comment? If so we need to skip it here...
  1090. if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
  1091. && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
  1092. {
  1093. while ((m_position != m_end)
  1094. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
  1095. }
  1096. contin = true;
  1097. }
  1098. else
  1099. contin = false;
  1100. break;
  1101. default:
  1102. contin = false;
  1103. }
  1104. }
  1105. else
  1106. contin = false;
  1107. } while (contin);
  1108. }
  1109. re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
  1110. pb->index = -3;
  1111. pb->icase = this->flags() & regbase::icase;
  1112. jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
  1113. this->m_pdata->m_data.align();
  1114. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  1115. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  1116. pb->index = -3;
  1117. pb->icase = this->flags() & regbase::icase;
  1118. }
  1119. return true;
  1120. }
  1121. template <class charT, class traits>
  1122. bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
  1123. {
  1124. static const char incomplete_message[] = "Missing } in quantified repetition.";
  1125. //
  1126. // parse a repeat-range:
  1127. //
  1128. std::size_t min, max;
  1129. boost::intmax_t v;
  1130. // skip whitespace:
  1131. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1132. ++m_position;
  1133. if(this->m_position == this->m_end)
  1134. {
  1135. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1136. {
  1137. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1138. return false;
  1139. }
  1140. // Treat the opening '{' as a literal character, rewind to start of error:
  1141. --m_position;
  1142. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1143. return parse_literal();
  1144. }
  1145. // get min:
  1146. v = this->m_traits.toi(m_position, m_end, 10);
  1147. // skip whitespace:
  1148. if((v < 0) || (v > umax()))
  1149. {
  1150. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1151. {
  1152. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1153. return false;
  1154. }
  1155. // Treat the opening '{' as a literal character, rewind to start of error:
  1156. --m_position;
  1157. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1158. return parse_literal();
  1159. }
  1160. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1161. ++m_position;
  1162. if(this->m_position == this->m_end)
  1163. {
  1164. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1165. {
  1166. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1167. return false;
  1168. }
  1169. // Treat the opening '{' as a literal character, rewind to start of error:
  1170. --m_position;
  1171. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1172. return parse_literal();
  1173. }
  1174. min = static_cast<std::size_t>(v);
  1175. // see if we have a comma:
  1176. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
  1177. {
  1178. // move on and error check:
  1179. ++m_position;
  1180. // skip whitespace:
  1181. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1182. ++m_position;
  1183. if(this->m_position == this->m_end)
  1184. {
  1185. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1186. {
  1187. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1188. return false;
  1189. }
  1190. // Treat the opening '{' as a literal character, rewind to start of error:
  1191. --m_position;
  1192. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1193. return parse_literal();
  1194. }
  1195. // get the value if any:
  1196. v = this->m_traits.toi(m_position, m_end, 10);
  1197. max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
  1198. }
  1199. else
  1200. {
  1201. // no comma, max = min:
  1202. max = min;
  1203. }
  1204. // skip whitespace:
  1205. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1206. ++m_position;
  1207. // OK now check trailing }:
  1208. if(this->m_position == this->m_end)
  1209. {
  1210. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1211. {
  1212. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1213. return false;
  1214. }
  1215. // Treat the opening '{' as a literal character, rewind to start of error:
  1216. --m_position;
  1217. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1218. return parse_literal();
  1219. }
  1220. if(isbasic)
  1221. {
  1222. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
  1223. {
  1224. ++m_position;
  1225. if(this->m_position == this->m_end)
  1226. {
  1227. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1228. return false;
  1229. }
  1230. }
  1231. else
  1232. {
  1233. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1234. return false;
  1235. }
  1236. }
  1237. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
  1238. ++m_position;
  1239. else
  1240. {
  1241. // Treat the opening '{' as a literal character, rewind to start of error:
  1242. --m_position;
  1243. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1244. return parse_literal();
  1245. }
  1246. //
  1247. // finally go and add the repeat, unless error:
  1248. //
  1249. if(min > max)
  1250. {
  1251. // Backtrack to error location:
  1252. m_position -= 2;
  1253. while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
  1254. ++m_position;
  1255. fail(regex_constants::error_badbrace, m_position - m_base);
  1256. return false;
  1257. }
  1258. return parse_repeat(min, max);
  1259. }
  1260. template <class charT, class traits>
  1261. bool basic_regex_parser<charT, traits>::parse_alt()
  1262. {
  1263. //
  1264. // error check: if there have been no previous states,
  1265. // or if the last state was a '(' then error:
  1266. //
  1267. if(
  1268. ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
  1269. &&
  1270. !(
  1271. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  1272. &&
  1273. ((this->flags() & regbase::no_empty_expressions) == 0)
  1274. )
  1275. )
  1276. {
  1277. fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
  1278. return false;
  1279. }
  1280. //
  1281. // Reset mark count if required:
  1282. //
  1283. if(m_max_mark < m_mark_count)
  1284. m_max_mark = m_mark_count;
  1285. if(m_mark_reset >= 0)
  1286. m_mark_count = m_mark_reset;
  1287. ++m_position;
  1288. //
  1289. // we need to append a trailing jump:
  1290. //
  1291. re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
  1292. std::ptrdiff_t jump_offset = this->getoffset(pj);
  1293. //
  1294. // now insert the alternative:
  1295. //
  1296. re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
  1297. jump_offset += re_alt_size;
  1298. this->m_pdata->m_data.align();
  1299. palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
  1300. //
  1301. // update m_alt_insert_point so that the next alternate gets
  1302. // inserted at the start of the second of the two we've just created:
  1303. //
  1304. this->m_alt_insert_point = this->m_pdata->m_data.size();
  1305. //
  1306. // the start of this alternative must have a case changes state
  1307. // if the current block has messed around with case changes:
  1308. //
  1309. if(m_has_case_change)
  1310. {
  1311. static_cast<re_case*>(
  1312. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  1313. )->icase = this->m_icase;
  1314. }
  1315. //
  1316. // push the alternative onto our stack, a recursive
  1317. // implementation here is easier to understand (and faster
  1318. // as it happens), but causes all kinds of stack overflow problems
  1319. // on programs with small stacks (COM+).
  1320. //
  1321. m_alt_jumps.push_back(jump_offset);
  1322. return true;
  1323. }
  1324. template <class charT, class traits>
  1325. bool basic_regex_parser<charT, traits>::parse_set()
  1326. {
  1327. static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1328. ++m_position;
  1329. if(m_position == m_end)
  1330. {
  1331. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1332. return false;
  1333. }
  1334. basic_char_set<charT, traits> char_set;
  1335. const charT* base = m_position; // where the '[' was
  1336. const charT* item_base = m_position; // where the '[' or '^' was
  1337. while(m_position != m_end)
  1338. {
  1339. switch(this->m_traits.syntax_type(*m_position))
  1340. {
  1341. case regex_constants::syntax_caret:
  1342. if(m_position == base)
  1343. {
  1344. char_set.negate();
  1345. ++m_position;
  1346. item_base = m_position;
  1347. }
  1348. else
  1349. parse_set_literal(char_set);
  1350. break;
  1351. case regex_constants::syntax_close_set:
  1352. if(m_position == item_base)
  1353. {
  1354. parse_set_literal(char_set);
  1355. break;
  1356. }
  1357. else
  1358. {
  1359. ++m_position;
  1360. if(0 == this->append_set(char_set))
  1361. {
  1362. fail(regex_constants::error_ctype, m_position - m_base);
  1363. return false;
  1364. }
  1365. }
  1366. return true;
  1367. case regex_constants::syntax_open_set:
  1368. if(parse_inner_set(char_set))
  1369. break;
  1370. return true;
  1371. case regex_constants::syntax_escape:
  1372. {
  1373. //
  1374. // look ahead and see if this is a character class shortcut
  1375. // \d \w \s etc...
  1376. //
  1377. ++m_position;
  1378. if(this->m_traits.escape_syntax_type(*m_position)
  1379. == regex_constants::escape_type_class)
  1380. {
  1381. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1382. if(m != 0)
  1383. {
  1384. char_set.add_class(m);
  1385. ++m_position;
  1386. break;
  1387. }
  1388. }
  1389. else if(this->m_traits.escape_syntax_type(*m_position)
  1390. == regex_constants::escape_type_not_class)
  1391. {
  1392. // negated character class:
  1393. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1394. if(m != 0)
  1395. {
  1396. char_set.add_negated_class(m);
  1397. ++m_position;
  1398. break;
  1399. }
  1400. }
  1401. // not a character class, just a regular escape:
  1402. --m_position;
  1403. parse_set_literal(char_set);
  1404. break;
  1405. }
  1406. default:
  1407. parse_set_literal(char_set);
  1408. break;
  1409. }
  1410. }
  1411. return m_position != m_end;
  1412. }
  1413. template <class charT, class traits>
  1414. bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
  1415. {
  1416. static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1417. //
  1418. // we have either a character class [:name:]
  1419. // a collating element [.name.]
  1420. // or an equivalence class [=name=]
  1421. //
  1422. if(m_end == ++m_position)
  1423. {
  1424. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1425. return false;
  1426. }
  1427. switch(this->m_traits.syntax_type(*m_position))
  1428. {
  1429. case regex_constants::syntax_dot:
  1430. //
  1431. // a collating element is treated as a literal:
  1432. //
  1433. --m_position;
  1434. parse_set_literal(char_set);
  1435. return true;
  1436. case regex_constants::syntax_colon:
  1437. {
  1438. // check that character classes are actually enabled:
  1439. if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
  1440. == (regbase::basic_syntax_group | regbase::no_char_classes))
  1441. {
  1442. --m_position;
  1443. parse_set_literal(char_set);
  1444. return true;
  1445. }
  1446. // skip the ':'
  1447. if(m_end == ++m_position)
  1448. {
  1449. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1450. return false;
  1451. }
  1452. const charT* name_first = m_position;
  1453. // skip at least one character, then find the matching ':]'
  1454. if(m_end == ++m_position)
  1455. {
  1456. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1457. return false;
  1458. }
  1459. while((m_position != m_end)
  1460. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
  1461. ++m_position;
  1462. const charT* name_last = m_position;
  1463. if(m_end == m_position)
  1464. {
  1465. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1466. return false;
  1467. }
  1468. if((m_end == ++m_position)
  1469. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1470. {
  1471. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1472. return false;
  1473. }
  1474. //
  1475. // check for negated class:
  1476. //
  1477. bool negated = false;
  1478. if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
  1479. {
  1480. ++name_first;
  1481. negated = true;
  1482. }
  1483. typedef typename traits::char_class_type m_type;
  1484. m_type m = this->m_traits.lookup_classname(name_first, name_last);
  1485. if(m == 0)
  1486. {
  1487. if(char_set.empty() && (name_last - name_first == 1))
  1488. {
  1489. // maybe a special case:
  1490. ++m_position;
  1491. if( (m_position != m_end)
  1492. && (this->m_traits.syntax_type(*m_position)
  1493. == regex_constants::syntax_close_set))
  1494. {
  1495. if(this->m_traits.escape_syntax_type(*name_first)
  1496. == regex_constants::escape_type_left_word)
  1497. {
  1498. ++m_position;
  1499. this->append_state(syntax_element_word_start);
  1500. return false;
  1501. }
  1502. if(this->m_traits.escape_syntax_type(*name_first)
  1503. == regex_constants::escape_type_right_word)
  1504. {
  1505. ++m_position;
  1506. this->append_state(syntax_element_word_end);
  1507. return false;
  1508. }
  1509. }
  1510. }
  1511. fail(regex_constants::error_ctype, name_first - m_base);
  1512. return false;
  1513. }
  1514. if(!negated)
  1515. char_set.add_class(m);
  1516. else
  1517. char_set.add_negated_class(m);
  1518. ++m_position;
  1519. break;
  1520. }
  1521. case regex_constants::syntax_equal:
  1522. {
  1523. // skip the '='
  1524. if(m_end == ++m_position)
  1525. {
  1526. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1527. return false;
  1528. }
  1529. const charT* name_first = m_position;
  1530. // skip at least one character, then find the matching '=]'
  1531. if(m_end == ++m_position)
  1532. {
  1533. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1534. return false;
  1535. }
  1536. while((m_position != m_end)
  1537. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
  1538. ++m_position;
  1539. const charT* name_last = m_position;
  1540. if(m_end == m_position)
  1541. {
  1542. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1543. return false;
  1544. }
  1545. if((m_end == ++m_position)
  1546. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1547. {
  1548. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1549. return false;
  1550. }
  1551. string_type m = this->m_traits.lookup_collatename(name_first, name_last);
  1552. if(m.empty() || (m.size() > 2))
  1553. {
  1554. fail(regex_constants::error_collate, name_first - m_base);
  1555. return false;
  1556. }
  1557. digraph<charT> d;
  1558. d.first = m[0];
  1559. if(m.size() > 1)
  1560. d.second = m[1];
  1561. else
  1562. d.second = 0;
  1563. char_set.add_equivalent(d);
  1564. ++m_position;
  1565. break;
  1566. }
  1567. default:
  1568. --m_position;
  1569. parse_set_literal(char_set);
  1570. break;
  1571. }
  1572. return true;
  1573. }
  1574. template <class charT, class traits>
  1575. void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
  1576. {
  1577. digraph<charT> start_range(get_next_set_literal(char_set));
  1578. if(m_end == m_position)
  1579. {
  1580. fail(regex_constants::error_brack, m_position - m_base);
  1581. return;
  1582. }
  1583. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1584. {
  1585. // we have a range:
  1586. if(m_end == ++m_position)
  1587. {
  1588. fail(regex_constants::error_brack, m_position - m_base);
  1589. return;
  1590. }
  1591. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
  1592. {
  1593. digraph<charT> end_range = get_next_set_literal(char_set);
  1594. char_set.add_range(start_range, end_range);
  1595. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1596. {
  1597. if(m_end == ++m_position)
  1598. {
  1599. fail(regex_constants::error_brack, m_position - m_base);
  1600. return;
  1601. }
  1602. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
  1603. {
  1604. // trailing - :
  1605. --m_position;
  1606. return;
  1607. }
  1608. fail(regex_constants::error_range, m_position - m_base);
  1609. return;
  1610. }
  1611. return;
  1612. }
  1613. --m_position;
  1614. }
  1615. char_set.add_single(start_range);
  1616. }
  1617. template <class charT, class traits>
  1618. digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
  1619. {
  1620. digraph<charT> result;
  1621. switch(this->m_traits.syntax_type(*m_position))
  1622. {
  1623. case regex_constants::syntax_dash:
  1624. if(!char_set.empty())
  1625. {
  1626. // see if we are at the end of the set:
  1627. if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1628. {
  1629. fail(regex_constants::error_range, m_position - m_base);
  1630. return result;
  1631. }
  1632. --m_position;
  1633. }
  1634. result.first = *m_position++;
  1635. return result;
  1636. case regex_constants::syntax_escape:
  1637. // check to see if escapes are supported first:
  1638. if(this->flags() & regex_constants::no_escape_in_lists)
  1639. {
  1640. result = *m_position++;
  1641. break;
  1642. }
  1643. ++m_position;
  1644. result = unescape_character();
  1645. break;
  1646. case regex_constants::syntax_open_set:
  1647. {
  1648. if(m_end == ++m_position)
  1649. {
  1650. fail(regex_constants::error_collate, m_position - m_base);
  1651. return result;
  1652. }
  1653. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
  1654. {
  1655. --m_position;
  1656. result.first = *m_position;
  1657. ++m_position;
  1658. return result;
  1659. }
  1660. if(m_end == ++m_position)
  1661. {
  1662. fail(regex_constants::error_collate, m_position - m_base);
  1663. return result;
  1664. }
  1665. const charT* name_first = m_position;
  1666. // skip at least one character, then find the matching ':]'
  1667. if(m_end == ++m_position)
  1668. {
  1669. fail(regex_constants::error_collate, name_first - m_base);
  1670. return result;
  1671. }
  1672. while((m_position != m_end)
  1673. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
  1674. ++m_position;
  1675. const charT* name_last = m_position;
  1676. if(m_end == m_position)
  1677. {
  1678. fail(regex_constants::error_collate, name_first - m_base);
  1679. return result;
  1680. }
  1681. if((m_end == ++m_position)
  1682. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1683. {
  1684. fail(regex_constants::error_collate, name_first - m_base);
  1685. return result;
  1686. }
  1687. ++m_position;
  1688. string_type s = this->m_traits.lookup_collatename(name_first, name_last);
  1689. if(s.empty() || (s.size() > 2))
  1690. {
  1691. fail(regex_constants::error_collate, name_first - m_base);
  1692. return result;
  1693. }
  1694. result.first = s[0];
  1695. if(s.size() > 1)
  1696. result.second = s[1];
  1697. else
  1698. result.second = 0;
  1699. return result;
  1700. }
  1701. default:
  1702. result = *m_position++;
  1703. }
  1704. return result;
  1705. }
  1706. //
  1707. // does a value fit in the specified charT type?
  1708. //
  1709. template <class charT>
  1710. bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
  1711. {
  1712. return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
  1713. }
  1714. template <class charT>
  1715. bool valid_value(charT, boost::intmax_t, const mpl::false_&)
  1716. {
  1717. return true; // v will alsways fit in a charT
  1718. }
  1719. template <class charT>
  1720. bool valid_value(charT c, boost::intmax_t v)
  1721. {
  1722. return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
  1723. }
  1724. template <class charT, class traits>
  1725. charT basic_regex_parser<charT, traits>::unescape_character()
  1726. {
  1727. #ifdef BOOST_MSVC
  1728. #pragma warning(push)
  1729. #pragma warning(disable:4127)
  1730. #endif
  1731. charT result(0);
  1732. if(m_position == m_end)
  1733. {
  1734. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
  1735. return false;
  1736. }
  1737. switch(this->m_traits.escape_syntax_type(*m_position))
  1738. {
  1739. case regex_constants::escape_type_control_a:
  1740. result = charT('\a');
  1741. break;
  1742. case regex_constants::escape_type_e:
  1743. result = charT(27);
  1744. break;
  1745. case regex_constants::escape_type_control_f:
  1746. result = charT('\f');
  1747. break;
  1748. case regex_constants::escape_type_control_n:
  1749. result = charT('\n');
  1750. break;
  1751. case regex_constants::escape_type_control_r:
  1752. result = charT('\r');
  1753. break;
  1754. case regex_constants::escape_type_control_t:
  1755. result = charT('\t');
  1756. break;
  1757. case regex_constants::escape_type_control_v:
  1758. result = charT('\v');
  1759. break;
  1760. case regex_constants::escape_type_word_assert:
  1761. result = charT('\b');
  1762. break;
  1763. case regex_constants::escape_type_ascii_control:
  1764. ++m_position;
  1765. if(m_position == m_end)
  1766. {
  1767. // Rewind to start of escape:
  1768. --m_position;
  1769. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1770. fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
  1771. return result;
  1772. }
  1773. result = static_cast<charT>(*m_position % 32);
  1774. break;
  1775. case regex_constants::escape_type_hex:
  1776. ++m_position;
  1777. if(m_position == m_end)
  1778. {
  1779. // Rewind to start of escape:
  1780. --m_position;
  1781. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1782. fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
  1783. return result;
  1784. }
  1785. // maybe have \x{ddd}
  1786. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1787. {
  1788. ++m_position;
  1789. if(m_position == m_end)
  1790. {
  1791. // Rewind to start of escape:
  1792. --m_position;
  1793. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1794. fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
  1795. return result;
  1796. }
  1797. boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
  1798. if((m_position == m_end)
  1799. || (i < 0)
  1800. || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
  1801. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1802. {
  1803. // Rewind to start of escape:
  1804. --m_position;
  1805. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1806. fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
  1807. return result;
  1808. }
  1809. ++m_position;
  1810. result = charT(i);
  1811. }
  1812. else
  1813. {
  1814. std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
  1815. boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
  1816. if((i < 0)
  1817. || !valid_value(charT(0), i))
  1818. {
  1819. // Rewind to start of escape:
  1820. --m_position;
  1821. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1822. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
  1823. return result;
  1824. }
  1825. result = charT(i);
  1826. }
  1827. return result;
  1828. case regex_constants::syntax_digit:
  1829. {
  1830. // an octal escape sequence, the first character must be a zero
  1831. // followed by up to 3 octal digits:
  1832. std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
  1833. const charT* bp = m_position;
  1834. boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
  1835. if(val != 0)
  1836. {
  1837. // Rewind to start of escape:
  1838. --m_position;
  1839. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1840. // Oops not an octal escape after all:
  1841. fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
  1842. return result;
  1843. }
  1844. val = this->m_traits.toi(m_position, m_position + len, 8);
  1845. if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
  1846. {
  1847. // Rewind to start of escape:
  1848. --m_position;
  1849. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1850. fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
  1851. return result;
  1852. }
  1853. return static_cast<charT>(val);
  1854. }
  1855. case regex_constants::escape_type_named_char:
  1856. {
  1857. ++m_position;
  1858. if(m_position == m_end)
  1859. {
  1860. // Rewind to start of escape:
  1861. --m_position;
  1862. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1863. fail(regex_constants::error_escape, m_position - m_base);
  1864. return false;
  1865. }
  1866. // maybe have \N{name}
  1867. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1868. {
  1869. const charT* base = m_position;
  1870. // skip forward until we find enclosing brace:
  1871. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1872. ++m_position;
  1873. if(m_position == m_end)
  1874. {
  1875. // Rewind to start of escape:
  1876. --m_position;
  1877. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1878. fail(regex_constants::error_escape, m_position - m_base);
  1879. return false;
  1880. }
  1881. string_type s = this->m_traits.lookup_collatename(++base, m_position++);
  1882. if(s.empty())
  1883. {
  1884. // Rewind to start of escape:
  1885. --m_position;
  1886. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1887. fail(regex_constants::error_collate, m_position - m_base);
  1888. return false;
  1889. }
  1890. if(s.size() == 1)
  1891. {
  1892. return s[0];
  1893. }
  1894. }
  1895. // fall through is a failure:
  1896. // Rewind to start of escape:
  1897. --m_position;
  1898. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1899. fail(regex_constants::error_escape, m_position - m_base);
  1900. return false;
  1901. }
  1902. default:
  1903. result = *m_position;
  1904. break;
  1905. }
  1906. ++m_position;
  1907. return result;
  1908. #ifdef BOOST_MSVC
  1909. #pragma warning(pop)
  1910. #endif
  1911. }
  1912. template <class charT, class traits>
  1913. bool basic_regex_parser<charT, traits>::parse_backref()
  1914. {
  1915. BOOST_REGEX_ASSERT(m_position != m_end);
  1916. const charT* pc = m_position;
  1917. boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
  1918. if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
  1919. {
  1920. // not a backref at all but an octal escape sequence:
  1921. charT c = unescape_character();
  1922. this->append_literal(c);
  1923. }
  1924. else if((i > 0) && (this->m_backrefs.test(i)))
  1925. {
  1926. m_position = pc;
  1927. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  1928. pb->index = i;
  1929. pb->icase = this->flags() & regbase::icase;
  1930. }
  1931. else
  1932. {
  1933. // Rewind to start of escape:
  1934. --m_position;
  1935. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1936. fail(regex_constants::error_backref, m_position - m_base);
  1937. return false;
  1938. }
  1939. return true;
  1940. }
  1941. template <class charT, class traits>
  1942. bool basic_regex_parser<charT, traits>::parse_QE()
  1943. {
  1944. #ifdef BOOST_MSVC
  1945. #pragma warning(push)
  1946. #pragma warning(disable:4127)
  1947. #endif
  1948. //
  1949. // parse a \Q...\E sequence:
  1950. //
  1951. ++m_position; // skip the Q
  1952. const charT* start = m_position;
  1953. const charT* end;
  1954. do
  1955. {
  1956. while((m_position != m_end)
  1957. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
  1958. ++m_position;
  1959. if(m_position == m_end)
  1960. {
  1961. // a \Q...\E sequence may terminate with the end of the expression:
  1962. end = m_position;
  1963. break;
  1964. }
  1965. if(++m_position == m_end) // skip the escape
  1966. {
  1967. fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
  1968. return false;
  1969. }
  1970. // check to see if it's a \E:
  1971. if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
  1972. {
  1973. ++m_position;
  1974. end = m_position - 2;
  1975. break;
  1976. }
  1977. // otherwise go round again:
  1978. }while(true);
  1979. //
  1980. // now add all the character between the two escapes as literals:
  1981. //
  1982. while(start != end)
  1983. {
  1984. this->append_literal(*start);
  1985. ++start;
  1986. }
  1987. return true;
  1988. #ifdef BOOST_MSVC
  1989. #pragma warning(pop)
  1990. #endif
  1991. }
  1992. template <class charT, class traits>
  1993. bool basic_regex_parser<charT, traits>::parse_perl_extension()
  1994. {
  1995. if(++m_position == m_end)
  1996. {
  1997. // Rewind to start of (? sequence:
  1998. --m_position;
  1999. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2000. fail(regex_constants::error_perl_extension, m_position - m_base);
  2001. return false;
  2002. }
  2003. //
  2004. // treat comments as a special case, as these
  2005. // are the only ones that don't start with a leading
  2006. // startmark state:
  2007. //
  2008. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
  2009. {
  2010. while((m_position != m_end)
  2011. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
  2012. {}
  2013. return true;
  2014. }
  2015. //
  2016. // backup some state, and prepare the way:
  2017. //
  2018. int markid = 0;
  2019. std::ptrdiff_t jump_offset = 0;
  2020. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  2021. pb->icase = this->flags() & regbase::icase;
  2022. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  2023. // back up insertion point for alternations, and set new point:
  2024. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  2025. this->m_pdata->m_data.align();
  2026. m_alt_insert_point = this->m_pdata->m_data.size();
  2027. std::ptrdiff_t expected_alt_point = m_alt_insert_point;
  2028. bool restore_flags = true;
  2029. regex_constants::syntax_option_type old_flags = this->flags();
  2030. bool old_case_change = m_has_case_change;
  2031. m_has_case_change = false;
  2032. charT name_delim;
  2033. int mark_reset = m_mark_reset;
  2034. int max_mark = m_max_mark;
  2035. m_mark_reset = -1;
  2036. m_max_mark = m_mark_count;
  2037. boost::intmax_t v;
  2038. //
  2039. // select the actual extension used:
  2040. //
  2041. switch(this->m_traits.syntax_type(*m_position))
  2042. {
  2043. case regex_constants::syntax_or:
  2044. m_mark_reset = m_mark_count;
  2045. BOOST_FALLTHROUGH;
  2046. case regex_constants::syntax_colon:
  2047. //
  2048. // a non-capturing mark:
  2049. //
  2050. pb->index = markid = 0;
  2051. ++m_position;
  2052. break;
  2053. case regex_constants::syntax_digit:
  2054. {
  2055. //
  2056. // a recursive subexpression:
  2057. //
  2058. v = this->m_traits.toi(m_position, m_end, 10);
  2059. if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2060. {
  2061. // Rewind to start of (? sequence:
  2062. --m_position;
  2063. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2064. fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
  2065. return false;
  2066. }
  2067. insert_recursion:
  2068. pb->index = markid = 0;
  2069. re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
  2070. pr->alt.i = v;
  2071. pr->state_id = 0;
  2072. static_cast<re_case*>(
  2073. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2074. )->icase = this->flags() & regbase::icase;
  2075. break;
  2076. }
  2077. case regex_constants::syntax_plus:
  2078. //
  2079. // A forward-relative recursive subexpression:
  2080. //
  2081. ++m_position;
  2082. v = this->m_traits.toi(m_position, m_end, 10);
  2083. if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2084. {
  2085. // Rewind to start of (? sequence:
  2086. --m_position;
  2087. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2088. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2089. return false;
  2090. }
  2091. if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
  2092. {
  2093. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2094. return false;
  2095. }
  2096. v += m_mark_count;
  2097. goto insert_recursion;
  2098. case regex_constants::syntax_dash:
  2099. //
  2100. // Possibly a backward-relative recursive subexpression:
  2101. //
  2102. ++m_position;
  2103. v = this->m_traits.toi(m_position, m_end, 10);
  2104. if(v <= 0)
  2105. {
  2106. --m_position;
  2107. // Oops not a relative recursion at all, but a (?-imsx) group:
  2108. goto option_group_jump;
  2109. }
  2110. v = static_cast<boost::intmax_t>(m_mark_count) + 1 - v;
  2111. if(v <= 0)
  2112. {
  2113. // Rewind to start of (? sequence:
  2114. --m_position;
  2115. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2116. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2117. return false;
  2118. }
  2119. goto insert_recursion;
  2120. case regex_constants::syntax_equal:
  2121. pb->index = markid = -1;
  2122. ++m_position;
  2123. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2124. this->m_pdata->m_data.align();
  2125. m_alt_insert_point = this->m_pdata->m_data.size();
  2126. break;
  2127. case regex_constants::syntax_not:
  2128. pb->index = markid = -2;
  2129. ++m_position;
  2130. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2131. this->m_pdata->m_data.align();
  2132. m_alt_insert_point = this->m_pdata->m_data.size();
  2133. break;
  2134. case regex_constants::escape_type_left_word:
  2135. {
  2136. // a lookbehind assertion:
  2137. if(++m_position == m_end)
  2138. {
  2139. // Rewind to start of (? sequence:
  2140. --m_position;
  2141. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2142. fail(regex_constants::error_perl_extension, m_position - m_base);
  2143. return false;
  2144. }
  2145. regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
  2146. if(t == regex_constants::syntax_not)
  2147. pb->index = markid = -2;
  2148. else if(t == regex_constants::syntax_equal)
  2149. pb->index = markid = -1;
  2150. else
  2151. {
  2152. // Probably a named capture which also starts (?< :
  2153. name_delim = '>';
  2154. --m_position;
  2155. goto named_capture_jump;
  2156. }
  2157. ++m_position;
  2158. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2159. this->append_state(syntax_element_backstep, sizeof(re_brace));
  2160. this->m_pdata->m_data.align();
  2161. m_alt_insert_point = this->m_pdata->m_data.size();
  2162. break;
  2163. }
  2164. case regex_constants::escape_type_right_word:
  2165. //
  2166. // an independent sub-expression:
  2167. //
  2168. pb->index = markid = -3;
  2169. ++m_position;
  2170. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2171. this->m_pdata->m_data.align();
  2172. m_alt_insert_point = this->m_pdata->m_data.size();
  2173. break;
  2174. case regex_constants::syntax_open_mark:
  2175. {
  2176. // a conditional expression:
  2177. pb->index = markid = -4;
  2178. if(++m_position == m_end)
  2179. {
  2180. // Rewind to start of (? sequence:
  2181. --m_position;
  2182. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2183. fail(regex_constants::error_perl_extension, m_position - m_base);
  2184. return false;
  2185. }
  2186. v = this->m_traits.toi(m_position, m_end, 10);
  2187. if(m_position == m_end)
  2188. {
  2189. // Rewind to start of (? sequence:
  2190. --m_position;
  2191. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2192. fail(regex_constants::error_perl_extension, m_position - m_base);
  2193. return false;
  2194. }
  2195. if(*m_position == charT('R'))
  2196. {
  2197. if(++m_position == m_end)
  2198. {
  2199. // Rewind to start of (? sequence:
  2200. --m_position;
  2201. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2202. fail(regex_constants::error_perl_extension, m_position - m_base);
  2203. return false;
  2204. }
  2205. if(*m_position == charT('&'))
  2206. {
  2207. const charT* base = ++m_position;
  2208. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2209. ++m_position;
  2210. if(m_position == m_end)
  2211. {
  2212. // Rewind to start of (? sequence:
  2213. --m_position;
  2214. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2215. fail(regex_constants::error_perl_extension, m_position - m_base);
  2216. return false;
  2217. }
  2218. v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
  2219. }
  2220. else
  2221. {
  2222. v = -this->m_traits.toi(m_position, m_end, 10);
  2223. }
  2224. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2225. br->index = v < 0 ? (v - 1) : 0;
  2226. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2227. {
  2228. // Rewind to start of (? sequence:
  2229. --m_position;
  2230. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2231. fail(regex_constants::error_perl_extension, m_position - m_base);
  2232. return false;
  2233. }
  2234. if(++m_position == m_end)
  2235. {
  2236. // Rewind to start of (? sequence:
  2237. --m_position;
  2238. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2239. fail(regex_constants::error_perl_extension, m_position - m_base);
  2240. return false;
  2241. }
  2242. }
  2243. else if((*m_position == charT('\'')) || (*m_position == charT('<')))
  2244. {
  2245. const charT* base = ++m_position;
  2246. while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
  2247. ++m_position;
  2248. if(m_position == m_end)
  2249. {
  2250. // Rewind to start of (? sequence:
  2251. --m_position;
  2252. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2253. fail(regex_constants::error_perl_extension, m_position - m_base);
  2254. return false;
  2255. }
  2256. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2257. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2258. br->index = v;
  2259. if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
  2260. {
  2261. // Rewind to start of (? sequence:
  2262. --m_position;
  2263. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2264. fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
  2265. return false;
  2266. }
  2267. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2268. {
  2269. // Rewind to start of (? sequence:
  2270. --m_position;
  2271. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2272. fail(regex_constants::error_perl_extension, m_position - m_base);
  2273. return false;
  2274. }
  2275. if(++m_position == m_end)
  2276. {
  2277. // Rewind to start of (? sequence:
  2278. --m_position;
  2279. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2280. fail(regex_constants::error_perl_extension, m_position - m_base);
  2281. return false;
  2282. }
  2283. }
  2284. else if(*m_position == charT('D'))
  2285. {
  2286. const char* def = "DEFINE";
  2287. while(*def && (m_position != m_end) && (*m_position == charT(*def)))
  2288. ++m_position, ++def;
  2289. if((m_position == m_end) || *def)
  2290. {
  2291. // Rewind to start of (? sequence:
  2292. --m_position;
  2293. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2294. fail(regex_constants::error_perl_extension, m_position - m_base);
  2295. return false;
  2296. }
  2297. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2298. br->index = 9999; // special magic value!
  2299. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2300. {
  2301. // Rewind to start of (? sequence:
  2302. --m_position;
  2303. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2304. fail(regex_constants::error_perl_extension, m_position - m_base);
  2305. return false;
  2306. }
  2307. if(++m_position == m_end)
  2308. {
  2309. // Rewind to start of (? sequence:
  2310. --m_position;
  2311. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2312. fail(regex_constants::error_perl_extension, m_position - m_base);
  2313. return false;
  2314. }
  2315. }
  2316. else if(v > 0)
  2317. {
  2318. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2319. br->index = v;
  2320. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2321. {
  2322. // Rewind to start of (? sequence:
  2323. --m_position;
  2324. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2325. fail(regex_constants::error_perl_extension, m_position - m_base);
  2326. return false;
  2327. }
  2328. if(++m_position == m_end)
  2329. {
  2330. // Rewind to start of (? sequence:
  2331. --m_position;
  2332. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2333. fail(regex_constants::error_perl_extension, m_position - m_base);
  2334. return false;
  2335. }
  2336. }
  2337. else
  2338. {
  2339. // verify that we have a lookahead or lookbehind assert:
  2340. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
  2341. {
  2342. // Rewind to start of (? sequence:
  2343. --m_position;
  2344. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2345. fail(regex_constants::error_perl_extension, m_position - m_base);
  2346. return false;
  2347. }
  2348. if(++m_position == m_end)
  2349. {
  2350. // Rewind to start of (? sequence:
  2351. --m_position;
  2352. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2353. fail(regex_constants::error_perl_extension, m_position - m_base);
  2354. return false;
  2355. }
  2356. if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
  2357. {
  2358. if(++m_position == m_end)
  2359. {
  2360. // Rewind to start of (? sequence:
  2361. --m_position;
  2362. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2363. fail(regex_constants::error_perl_extension, m_position - m_base);
  2364. return false;
  2365. }
  2366. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2367. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2368. {
  2369. // Rewind to start of (? sequence:
  2370. --m_position;
  2371. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2372. fail(regex_constants::error_perl_extension, m_position - m_base);
  2373. return false;
  2374. }
  2375. m_position -= 3;
  2376. }
  2377. else
  2378. {
  2379. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2380. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2381. {
  2382. // Rewind to start of (? sequence:
  2383. --m_position;
  2384. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2385. fail(regex_constants::error_perl_extension, m_position - m_base);
  2386. return false;
  2387. }
  2388. m_position -= 2;
  2389. }
  2390. }
  2391. break;
  2392. }
  2393. case regex_constants::syntax_close_mark:
  2394. // Rewind to start of (? sequence:
  2395. --m_position;
  2396. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2397. fail(regex_constants::error_perl_extension, m_position - m_base);
  2398. return false;
  2399. case regex_constants::escape_type_end_buffer:
  2400. {
  2401. name_delim = *m_position;
  2402. named_capture_jump:
  2403. markid = 0;
  2404. if(0 == (this->flags() & regbase::nosubs))
  2405. {
  2406. markid = ++m_mark_count;
  2407. #ifndef BOOST_NO_STD_DISTANCE
  2408. if(this->flags() & regbase::save_subexpression_location)
  2409. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
  2410. #else
  2411. if(this->flags() & regbase::save_subexpression_location)
  2412. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
  2413. #endif
  2414. }
  2415. pb->index = markid;
  2416. const charT* base = ++m_position;
  2417. if(m_position == m_end)
  2418. {
  2419. // Rewind to start of (? sequence:
  2420. --m_position;
  2421. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2422. fail(regex_constants::error_perl_extension, m_position - m_base);
  2423. return false;
  2424. }
  2425. while((m_position != m_end) && (*m_position != name_delim))
  2426. ++m_position;
  2427. if(m_position == m_end)
  2428. {
  2429. // Rewind to start of (? sequence:
  2430. --m_position;
  2431. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2432. fail(regex_constants::error_perl_extension, m_position - m_base);
  2433. return false;
  2434. }
  2435. this->m_pdata->set_name(base, m_position, markid);
  2436. ++m_position;
  2437. break;
  2438. }
  2439. default:
  2440. if(*m_position == charT('R'))
  2441. {
  2442. ++m_position;
  2443. v = 0;
  2444. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2445. {
  2446. // Rewind to start of (? sequence:
  2447. --m_position;
  2448. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2449. fail(regex_constants::error_perl_extension, m_position - m_base);
  2450. return false;
  2451. }
  2452. goto insert_recursion;
  2453. }
  2454. if(*m_position == charT('&'))
  2455. {
  2456. ++m_position;
  2457. const charT* base = m_position;
  2458. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2459. ++m_position;
  2460. if(m_position == m_end)
  2461. {
  2462. // Rewind to start of (? sequence:
  2463. --m_position;
  2464. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2465. fail(regex_constants::error_perl_extension, m_position - m_base);
  2466. return false;
  2467. }
  2468. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2469. goto insert_recursion;
  2470. }
  2471. if(*m_position == charT('P'))
  2472. {
  2473. ++m_position;
  2474. if(m_position == m_end)
  2475. {
  2476. // Rewind to start of (? sequence:
  2477. --m_position;
  2478. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2479. fail(regex_constants::error_perl_extension, m_position - m_base);
  2480. return false;
  2481. }
  2482. if(*m_position == charT('>'))
  2483. {
  2484. ++m_position;
  2485. const charT* base = m_position;
  2486. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2487. ++m_position;
  2488. if(m_position == m_end)
  2489. {
  2490. // Rewind to start of (? sequence:
  2491. --m_position;
  2492. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2493. fail(regex_constants::error_perl_extension, m_position - m_base);
  2494. return false;
  2495. }
  2496. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2497. goto insert_recursion;
  2498. }
  2499. }
  2500. //
  2501. // lets assume that we have a (?imsx) group and try and parse it:
  2502. //
  2503. option_group_jump:
  2504. regex_constants::syntax_option_type opts = parse_options();
  2505. if(m_position == m_end)
  2506. {
  2507. // Rewind to start of (? sequence:
  2508. --m_position;
  2509. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2510. fail(regex_constants::error_perl_extension, m_position - m_base);
  2511. return false;
  2512. }
  2513. // make a note of whether we have a case change:
  2514. m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
  2515. pb->index = markid = 0;
  2516. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
  2517. {
  2518. // update flags and carry on as normal:
  2519. this->flags(opts);
  2520. restore_flags = false;
  2521. old_case_change |= m_has_case_change; // defer end of scope by one ')'
  2522. }
  2523. else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
  2524. {
  2525. // update flags and carry on until the matching ')' is found:
  2526. this->flags(opts);
  2527. ++m_position;
  2528. }
  2529. else
  2530. {
  2531. // Rewind to start of (? sequence:
  2532. --m_position;
  2533. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2534. fail(regex_constants::error_perl_extension, m_position - m_base);
  2535. return false;
  2536. }
  2537. // finally append a case change state if we need it:
  2538. if(m_has_case_change)
  2539. {
  2540. static_cast<re_case*>(
  2541. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2542. )->icase = opts & regbase::icase;
  2543. }
  2544. }
  2545. //
  2546. // now recursively add more states, this will terminate when we get to a
  2547. // matching ')' :
  2548. //
  2549. parse_all();
  2550. //
  2551. // Unwind alternatives:
  2552. //
  2553. if(0 == unwind_alts(last_paren_start))
  2554. {
  2555. // Rewind to start of (? sequence:
  2556. --m_position;
  2557. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2558. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
  2559. return false;
  2560. }
  2561. //
  2562. // we either have a ')' or we have run out of characters prematurely:
  2563. //
  2564. if(m_position == m_end)
  2565. {
  2566. // Rewind to start of (? sequence:
  2567. --m_position;
  2568. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2569. this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
  2570. return false;
  2571. }
  2572. BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
  2573. ++m_position;
  2574. //
  2575. // restore the flags:
  2576. //
  2577. if(restore_flags)
  2578. {
  2579. // append a case change state if we need it:
  2580. if(m_has_case_change)
  2581. {
  2582. static_cast<re_case*>(
  2583. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2584. )->icase = old_flags & regbase::icase;
  2585. }
  2586. this->flags(old_flags);
  2587. }
  2588. //
  2589. // set up the jump pointer if we have one:
  2590. //
  2591. if(jump_offset)
  2592. {
  2593. this->m_pdata->m_data.align();
  2594. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  2595. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  2596. if((this->m_last_state == jmp) && (markid != -2))
  2597. {
  2598. // Oops... we didn't have anything inside the assertion.
  2599. // Note we don't get here for negated forward lookahead as (?!)
  2600. // does have some uses.
  2601. // Rewind to start of (? sequence:
  2602. --m_position;
  2603. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2604. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
  2605. return false;
  2606. }
  2607. }
  2608. //
  2609. // verify that if this is conditional expression, that we do have
  2610. // an alternative, if not add one:
  2611. //
  2612. if(markid == -4)
  2613. {
  2614. re_syntax_base* b = this->getaddress(expected_alt_point);
  2615. // Make sure we have exactly one alternative following this state:
  2616. if(b->type != syntax_element_alt)
  2617. {
  2618. re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
  2619. alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
  2620. }
  2621. else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
  2622. {
  2623. // Can't have seen more than one alternative:
  2624. // Rewind to start of (? sequence:
  2625. --m_position;
  2626. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2627. fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
  2628. return false;
  2629. }
  2630. else
  2631. {
  2632. // We must *not* have seen an alternative inside a (DEFINE) block:
  2633. b = this->getaddress(b->next.i, b);
  2634. if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
  2635. {
  2636. // Rewind to start of (? sequence:
  2637. --m_position;
  2638. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2639. fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
  2640. return false;
  2641. }
  2642. }
  2643. // check for invalid repetition of next state:
  2644. b = this->getaddress(expected_alt_point);
  2645. b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
  2646. if((b->type != syntax_element_assert_backref)
  2647. && (b->type != syntax_element_startmark))
  2648. {
  2649. // Rewind to start of (? sequence:
  2650. --m_position;
  2651. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2652. fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
  2653. return false;
  2654. }
  2655. }
  2656. //
  2657. // append closing parenthesis state:
  2658. //
  2659. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  2660. pb->index = markid;
  2661. pb->icase = this->flags() & regbase::icase;
  2662. this->m_paren_start = last_paren_start;
  2663. //
  2664. // restore the alternate insertion point:
  2665. //
  2666. this->m_alt_insert_point = last_alt_point;
  2667. //
  2668. // and the case change data:
  2669. //
  2670. m_has_case_change = old_case_change;
  2671. //
  2672. // And the mark_reset data:
  2673. //
  2674. if(m_max_mark > m_mark_count)
  2675. {
  2676. m_mark_count = m_max_mark;
  2677. }
  2678. m_mark_reset = mark_reset;
  2679. m_max_mark = max_mark;
  2680. if(markid > 0)
  2681. {
  2682. #ifndef BOOST_NO_STD_DISTANCE
  2683. if(this->flags() & regbase::save_subexpression_location)
  2684. this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
  2685. #else
  2686. if(this->flags() & regbase::save_subexpression_location)
  2687. this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
  2688. #endif
  2689. //
  2690. // allow backrefs to this mark:
  2691. //
  2692. this->m_backrefs.set(markid);
  2693. }
  2694. return true;
  2695. }
  2696. template <class charT, class traits>
  2697. bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
  2698. {
  2699. while(*verb)
  2700. {
  2701. if(static_cast<charT>(*verb) != *m_position)
  2702. {
  2703. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2704. fail(regex_constants::error_perl_extension, m_position - m_base);
  2705. return false;
  2706. }
  2707. if(++m_position == m_end)
  2708. {
  2709. --m_position;
  2710. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2711. fail(regex_constants::error_perl_extension, m_position - m_base);
  2712. return false;
  2713. }
  2714. ++verb;
  2715. }
  2716. return true;
  2717. }
  2718. #ifdef BOOST_MSVC
  2719. # pragma warning(push)
  2720. #if BOOST_MSVC >= 1800
  2721. #pragma warning(disable:26812)
  2722. #endif
  2723. #endif
  2724. template <class charT, class traits>
  2725. bool basic_regex_parser<charT, traits>::parse_perl_verb()
  2726. {
  2727. if(++m_position == m_end)
  2728. {
  2729. // Rewind to start of (* sequence:
  2730. --m_position;
  2731. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2732. fail(regex_constants::error_perl_extension, m_position - m_base);
  2733. return false;
  2734. }
  2735. switch(*m_position)
  2736. {
  2737. case 'F':
  2738. if(++m_position == m_end)
  2739. {
  2740. // Rewind to start of (* sequence:
  2741. --m_position;
  2742. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2743. fail(regex_constants::error_perl_extension, m_position - m_base);
  2744. return false;
  2745. }
  2746. if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
  2747. {
  2748. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2749. {
  2750. // Rewind to start of (* sequence:
  2751. --m_position;
  2752. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2753. fail(regex_constants::error_perl_extension, m_position - m_base);
  2754. return false;
  2755. }
  2756. ++m_position;
  2757. this->append_state(syntax_element_fail);
  2758. return true;
  2759. }
  2760. break;
  2761. case 'A':
  2762. if(++m_position == m_end)
  2763. {
  2764. // Rewind to start of (* sequence:
  2765. --m_position;
  2766. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2767. fail(regex_constants::error_perl_extension, m_position - m_base);
  2768. return false;
  2769. }
  2770. if(match_verb("CCEPT"))
  2771. {
  2772. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2773. {
  2774. // Rewind to start of (* sequence:
  2775. --m_position;
  2776. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2777. fail(regex_constants::error_perl_extension, m_position - m_base);
  2778. return false;
  2779. }
  2780. ++m_position;
  2781. this->append_state(syntax_element_accept);
  2782. return true;
  2783. }
  2784. break;
  2785. case 'C':
  2786. if(++m_position == m_end)
  2787. {
  2788. // Rewind to start of (* sequence:
  2789. --m_position;
  2790. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2791. fail(regex_constants::error_perl_extension, m_position - m_base);
  2792. return false;
  2793. }
  2794. if(match_verb("OMMIT"))
  2795. {
  2796. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2797. {
  2798. // Rewind to start of (* sequence:
  2799. --m_position;
  2800. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2801. fail(regex_constants::error_perl_extension, m_position - m_base);
  2802. return false;
  2803. }
  2804. ++m_position;
  2805. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
  2806. this->m_pdata->m_disable_match_any = true;
  2807. return true;
  2808. }
  2809. break;
  2810. case 'P':
  2811. if(++m_position == m_end)
  2812. {
  2813. // Rewind to start of (* sequence:
  2814. --m_position;
  2815. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2816. fail(regex_constants::error_perl_extension, m_position - m_base);
  2817. return false;
  2818. }
  2819. if(match_verb("RUNE"))
  2820. {
  2821. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2822. {
  2823. // Rewind to start of (* sequence:
  2824. --m_position;
  2825. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2826. fail(regex_constants::error_perl_extension, m_position - m_base);
  2827. return false;
  2828. }
  2829. ++m_position;
  2830. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
  2831. this->m_pdata->m_disable_match_any = true;
  2832. return true;
  2833. }
  2834. break;
  2835. case 'S':
  2836. if(++m_position == m_end)
  2837. {
  2838. // Rewind to start of (* sequence:
  2839. --m_position;
  2840. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2841. fail(regex_constants::error_perl_extension, m_position - m_base);
  2842. return false;
  2843. }
  2844. if(match_verb("KIP"))
  2845. {
  2846. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2847. {
  2848. // Rewind to start of (* sequence:
  2849. --m_position;
  2850. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2851. fail(regex_constants::error_perl_extension, m_position - m_base);
  2852. return false;
  2853. }
  2854. ++m_position;
  2855. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
  2856. this->m_pdata->m_disable_match_any = true;
  2857. return true;
  2858. }
  2859. break;
  2860. case 'T':
  2861. if(++m_position == m_end)
  2862. {
  2863. // Rewind to start of (* sequence:
  2864. --m_position;
  2865. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2866. fail(regex_constants::error_perl_extension, m_position - m_base);
  2867. return false;
  2868. }
  2869. if(match_verb("HEN"))
  2870. {
  2871. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2872. {
  2873. // Rewind to start of (* sequence:
  2874. --m_position;
  2875. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2876. fail(regex_constants::error_perl_extension, m_position - m_base);
  2877. return false;
  2878. }
  2879. ++m_position;
  2880. this->append_state(syntax_element_then);
  2881. this->m_pdata->m_disable_match_any = true;
  2882. return true;
  2883. }
  2884. break;
  2885. }
  2886. // Rewind to start of (* sequence:
  2887. --m_position;
  2888. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2889. fail(regex_constants::error_perl_extension, m_position - m_base);
  2890. return false;
  2891. }
  2892. #ifdef BOOST_MSVC
  2893. # pragma warning(pop)
  2894. #endif
  2895. template <class charT, class traits>
  2896. bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
  2897. {
  2898. //
  2899. // parses an emacs style \sx or \Sx construct.
  2900. //
  2901. if(++m_position == m_end)
  2902. {
  2903. // Rewind to start of sequence:
  2904. --m_position;
  2905. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  2906. fail(regex_constants::error_escape, m_position - m_base);
  2907. return false;
  2908. }
  2909. basic_char_set<charT, traits> char_set;
  2910. if(negate)
  2911. char_set.negate();
  2912. static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
  2913. switch(*m_position)
  2914. {
  2915. case 's':
  2916. case ' ':
  2917. char_set.add_class(this->m_mask_space);
  2918. break;
  2919. case 'w':
  2920. char_set.add_class(this->m_word_mask);
  2921. break;
  2922. case '_':
  2923. char_set.add_single(digraph<charT>(charT('$')));
  2924. char_set.add_single(digraph<charT>(charT('&')));
  2925. char_set.add_single(digraph<charT>(charT('*')));
  2926. char_set.add_single(digraph<charT>(charT('+')));
  2927. char_set.add_single(digraph<charT>(charT('-')));
  2928. char_set.add_single(digraph<charT>(charT('_')));
  2929. char_set.add_single(digraph<charT>(charT('<')));
  2930. char_set.add_single(digraph<charT>(charT('>')));
  2931. break;
  2932. case '.':
  2933. char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
  2934. break;
  2935. case '(':
  2936. char_set.add_single(digraph<charT>(charT('(')));
  2937. char_set.add_single(digraph<charT>(charT('[')));
  2938. char_set.add_single(digraph<charT>(charT('{')));
  2939. break;
  2940. case ')':
  2941. char_set.add_single(digraph<charT>(charT(')')));
  2942. char_set.add_single(digraph<charT>(charT(']')));
  2943. char_set.add_single(digraph<charT>(charT('}')));
  2944. break;
  2945. case '"':
  2946. char_set.add_single(digraph<charT>(charT('"')));
  2947. char_set.add_single(digraph<charT>(charT('\'')));
  2948. char_set.add_single(digraph<charT>(charT('`')));
  2949. break;
  2950. case '\'':
  2951. char_set.add_single(digraph<charT>(charT('\'')));
  2952. char_set.add_single(digraph<charT>(charT(',')));
  2953. char_set.add_single(digraph<charT>(charT('#')));
  2954. break;
  2955. case '<':
  2956. char_set.add_single(digraph<charT>(charT(';')));
  2957. break;
  2958. case '>':
  2959. char_set.add_single(digraph<charT>(charT('\n')));
  2960. char_set.add_single(digraph<charT>(charT('\f')));
  2961. break;
  2962. default:
  2963. fail(regex_constants::error_ctype, m_position - m_base);
  2964. return false;
  2965. }
  2966. if(0 == this->append_set(char_set))
  2967. {
  2968. fail(regex_constants::error_ctype, m_position - m_base);
  2969. return false;
  2970. }
  2971. ++m_position;
  2972. return true;
  2973. }
  2974. template <class charT, class traits>
  2975. regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
  2976. {
  2977. // we have a (?imsx-imsx) group, convert it into a set of flags:
  2978. regex_constants::syntax_option_type f = this->flags();
  2979. bool breakout = false;
  2980. do
  2981. {
  2982. switch(*m_position)
  2983. {
  2984. case 's':
  2985. f |= regex_constants::mod_s;
  2986. f &= ~regex_constants::no_mod_s;
  2987. break;
  2988. case 'm':
  2989. f &= ~regex_constants::no_mod_m;
  2990. break;
  2991. case 'i':
  2992. f |= regex_constants::icase;
  2993. break;
  2994. case 'x':
  2995. f |= regex_constants::mod_x;
  2996. break;
  2997. default:
  2998. breakout = true;
  2999. continue;
  3000. }
  3001. if(++m_position == m_end)
  3002. {
  3003. // Rewind to start of (? sequence:
  3004. --m_position;
  3005. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  3006. fail(regex_constants::error_paren, m_position - m_base);
  3007. return false;
  3008. }
  3009. }
  3010. while(!breakout);
  3011. breakout = false;
  3012. if(*m_position == static_cast<charT>('-'))
  3013. {
  3014. if(++m_position == m_end)
  3015. {
  3016. // Rewind to start of (? sequence:
  3017. --m_position;
  3018. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  3019. fail(regex_constants::error_paren, m_position - m_base);
  3020. return false;
  3021. }
  3022. do
  3023. {
  3024. switch(*m_position)
  3025. {
  3026. case 's':
  3027. f &= ~regex_constants::mod_s;
  3028. f |= regex_constants::no_mod_s;
  3029. break;
  3030. case 'm':
  3031. f |= regex_constants::no_mod_m;
  3032. break;
  3033. case 'i':
  3034. f &= ~regex_constants::icase;
  3035. break;
  3036. case 'x':
  3037. f &= ~regex_constants::mod_x;
  3038. break;
  3039. default:
  3040. breakout = true;
  3041. continue;
  3042. }
  3043. if(++m_position == m_end)
  3044. {
  3045. // Rewind to start of (? sequence:
  3046. --m_position;
  3047. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  3048. fail(regex_constants::error_paren, m_position - m_base);
  3049. return false;
  3050. }
  3051. }
  3052. while(!breakout);
  3053. }
  3054. return f;
  3055. }
  3056. template <class charT, class traits>
  3057. bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
  3058. {
  3059. //
  3060. // If we didn't actually add any states after the last
  3061. // alternative then that's an error:
  3062. //
  3063. if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
  3064. && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)
  3065. &&
  3066. !(
  3067. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  3068. &&
  3069. ((this->flags() & regbase::no_empty_expressions) == 0)
  3070. )
  3071. )
  3072. {
  3073. fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
  3074. return false;
  3075. }
  3076. //
  3077. // Fix up our alternatives:
  3078. //
  3079. while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start))
  3080. {
  3081. //
  3082. // fix up the jump to point to the end of the states
  3083. // that we've just added:
  3084. //
  3085. std::ptrdiff_t jump_offset = m_alt_jumps.back();
  3086. m_alt_jumps.pop_back();
  3087. this->m_pdata->m_data.align();
  3088. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  3089. if (jmp->type != syntax_element_jump)
  3090. {
  3091. // Something really bad happened, this used to be an assert,
  3092. // but we'll make it an error just in case we should ever get here.
  3093. fail(regex_constants::error_unknown, this->m_position - this->m_base, "Internal logic failed while compiling the expression, probably you added a repeat to something non-repeatable!");
  3094. return false;
  3095. }
  3096. jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
  3097. }
  3098. return true;
  3099. }
  3100. #ifdef BOOST_MSVC
  3101. #pragma warning(pop)
  3102. #endif
  3103. } // namespace BOOST_REGEX_DETAIL_NS
  3104. } // namespace boost
  3105. #ifdef BOOST_MSVC
  3106. #pragma warning(push)
  3107. #pragma warning(disable: 4103)
  3108. #endif
  3109. #ifdef BOOST_HAS_ABI_HEADERS
  3110. # include BOOST_ABI_SUFFIX
  3111. #endif
  3112. #ifdef BOOST_MSVC
  3113. #pragma warning(pop)
  3114. #endif
  3115. #endif