basic_regex_parser.hpp 110 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE basic_regex_parser.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares template class basic_regex_parser.
  16. */
  17. #ifndef BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP
  18. #define BOOST_REGEX_V5_BASIC_REGEX_PARSER_HPP
  19. namespace boost{
  20. namespace BOOST_REGEX_DETAIL_NS{
  21. #ifdef BOOST_REGEX_MSVC
  22. #pragma warning(push)
  23. #pragma warning(disable:4244 4459)
  24. #if BOOST_REGEX_MSVC < 1910
  25. #pragma warning(disable:4800)
  26. #endif
  27. #endif
  28. inline std::intmax_t umax(std::integral_constant<bool, false> const&)
  29. {
  30. // Get out clause here, just in case numeric_limits is unspecialized:
  31. return std::numeric_limits<std::intmax_t>::is_specialized ? (std::numeric_limits<std::intmax_t>::max)() : INT_MAX;
  32. }
  33. inline std::intmax_t umax(std::integral_constant<bool, true> const&)
  34. {
  35. return (std::numeric_limits<std::size_t>::max)();
  36. }
  37. inline std::intmax_t umax()
  38. {
  39. return umax(std::integral_constant<bool, std::numeric_limits<std::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
  40. }
  41. template <class charT, class traits>
  42. class basic_regex_parser : public basic_regex_creator<charT, traits>
  43. {
  44. public:
  45. basic_regex_parser(regex_data<charT, traits>* data);
  46. void parse(const charT* p1, const charT* p2, unsigned flags);
  47. void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
  48. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
  49. void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
  50. {
  51. fail(error_code, position, message, position);
  52. }
  53. bool parse_all();
  54. bool parse_basic();
  55. bool parse_extended();
  56. bool parse_literal();
  57. bool parse_open_paren();
  58. bool parse_basic_escape();
  59. bool parse_extended_escape();
  60. bool parse_match_any();
  61. bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
  62. bool parse_repeat_range(bool isbasic);
  63. bool parse_alt();
  64. bool parse_set();
  65. bool parse_backref();
  66. void parse_set_literal(basic_char_set<charT, traits>& char_set);
  67. bool parse_inner_set(basic_char_set<charT, traits>& char_set);
  68. bool parse_QE();
  69. bool parse_perl_extension();
  70. bool parse_perl_verb();
  71. bool match_verb(const char*);
  72. bool add_emacs_code(bool negate);
  73. bool unwind_alts(std::ptrdiff_t last_paren_start);
  74. digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
  75. charT unescape_character();
  76. regex_constants::syntax_option_type parse_options();
  77. private:
  78. typedef bool (basic_regex_parser::*parser_proc_type)();
  79. typedef typename traits::string_type string_type;
  80. typedef typename traits::char_class_type char_class_type;
  81. parser_proc_type m_parser_proc; // the main parser to use
  82. const charT* m_base; // the start of the string being parsed
  83. const charT* m_end; // the end of the string being parsed
  84. const charT* m_position; // our current parser position
  85. unsigned m_mark_count; // how many sub-expressions we have
  86. int m_mark_reset; // used to indicate that we're inside a (?|...) block.
  87. unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
  88. std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
  89. std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
  90. bool m_has_case_change; // true if somewhere in the current block the case has changed
  91. unsigned m_recursion_count; // How many times we've called parse_all.
  92. unsigned m_max_backref; // Largest index of any backref.
  93. #if defined(BOOST_REGEX_MSVC) && defined(_M_IX86)
  94. // This is an ugly warning suppression workaround (for warnings *inside* std::vector
  95. // that can not otherwise be suppressed)...
  96. static_assert(sizeof(long) >= sizeof(void*), "Long isn't long enough!");
  97. std::vector<long> m_alt_jumps; // list of alternative in the current scope.
  98. #else
  99. std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
  100. #endif
  101. basic_regex_parser& operator=(const basic_regex_parser&);
  102. basic_regex_parser(const basic_regex_parser&);
  103. };
  104. template <class charT, class traits>
  105. basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
  106. : basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0),
  107. m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0), m_max_backref(0)
  108. {
  109. }
  110. template <class charT, class traits>
  111. void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
  112. {
  113. // pass l_flags on to base class:
  114. this->init(l_flags);
  115. // set up pointers:
  116. m_position = m_base = p1;
  117. m_end = p2;
  118. // empty strings are errors:
  119. if((p1 == p2) &&
  120. (
  121. ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
  122. || (l_flags & regbase::no_empty_expressions)
  123. )
  124. )
  125. {
  126. fail(regex_constants::error_empty, 0);
  127. return;
  128. }
  129. // select which parser to use:
  130. switch(l_flags & regbase::main_option_type)
  131. {
  132. case regbase::perl_syntax_group:
  133. {
  134. m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
  135. //
  136. // Add a leading paren with index zero to give recursions a target:
  137. //
  138. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  139. br->index = 0;
  140. br->icase = this->flags() & regbase::icase;
  141. break;
  142. }
  143. case regbase::basic_syntax_group:
  144. m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
  145. break;
  146. case regbase::literal:
  147. m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
  148. break;
  149. default:
  150. // Oops, someone has managed to set more than one of the main option flags,
  151. // so this must be an error:
  152. fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
  153. return;
  154. }
  155. // parse all our characters:
  156. bool result = parse_all();
  157. //
  158. // Unwind our alternatives:
  159. //
  160. unwind_alts(-1);
  161. // reset l_flags as a global scope (?imsx) may have altered them:
  162. this->flags(l_flags);
  163. // if we haven't gobbled up all the characters then we must
  164. // have had an unexpected ')' :
  165. if(!result)
  166. {
  167. fail(regex_constants::error_paren, std::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis.");
  168. return;
  169. }
  170. // if an error has been set then give up now:
  171. if(this->m_pdata->m_status)
  172. return;
  173. // fill in our sub-expression count:
  174. this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
  175. //
  176. // Check we don't have backreferences to sub-expressions which don't exist:
  177. //
  178. if (m_max_backref > m_mark_count)
  179. {
  180. fail(regex_constants::error_backref, std::distance(m_base, m_position), "Found a backreference to a non-existant sub-expression.");
  181. }
  182. this->finalize(p1, p2);
  183. }
  184. template <class charT, class traits>
  185. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
  186. {
  187. // get the error message:
  188. std::string message = this->m_pdata->m_ptraits->error_string(error_code);
  189. fail(error_code, position, message);
  190. }
  191. template <class charT, class traits>
  192. void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
  193. {
  194. if(0 == this->m_pdata->m_status) // update the error code if not already set
  195. this->m_pdata->m_status = error_code;
  196. m_position = m_end; // don't bother parsing anything else
  197. //
  198. // Augment error message with the regular expression text:
  199. //
  200. if(start_pos == position)
  201. start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
  202. std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
  203. if(error_code != regex_constants::error_empty)
  204. {
  205. if((start_pos != 0) || (end_pos != (m_end - m_base)))
  206. message += " The error occurred while parsing the regular expression fragment: '";
  207. else
  208. message += " The error occurred while parsing the regular expression: '";
  209. if(start_pos != end_pos)
  210. {
  211. message += std::string(m_base + start_pos, m_base + position);
  212. message += ">>>HERE>>>";
  213. message += std::string(m_base + position, m_base + end_pos);
  214. }
  215. message += "'.";
  216. }
  217. #ifndef BOOST_NO_EXCEPTIONS
  218. if(0 == (this->flags() & regex_constants::no_except))
  219. {
  220. boost::regex_error e(message, error_code, position);
  221. e.raise();
  222. }
  223. #else
  224. (void)position; // suppress warnings.
  225. #endif
  226. }
  227. template <class charT, class traits>
  228. bool basic_regex_parser<charT, traits>::parse_all()
  229. {
  230. if (++m_recursion_count > 400)
  231. {
  232. // exceeded internal limits
  233. fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
  234. }
  235. bool result = true;
  236. while(result && (m_position != m_end))
  237. {
  238. result = (this->*m_parser_proc)();
  239. }
  240. --m_recursion_count;
  241. return result;
  242. }
  243. #ifdef BOOST_REGEX_MSVC
  244. #pragma warning(push)
  245. #pragma warning(disable:4702)
  246. #endif
  247. template <class charT, class traits>
  248. bool basic_regex_parser<charT, traits>::parse_basic()
  249. {
  250. switch(this->m_traits.syntax_type(*m_position))
  251. {
  252. case regex_constants::syntax_escape:
  253. return parse_basic_escape();
  254. case regex_constants::syntax_dot:
  255. return parse_match_any();
  256. case regex_constants::syntax_caret:
  257. ++m_position;
  258. this->append_state(syntax_element_start_line);
  259. break;
  260. case regex_constants::syntax_dollar:
  261. ++m_position;
  262. this->append_state(syntax_element_end_line);
  263. break;
  264. case regex_constants::syntax_star:
  265. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
  266. return parse_literal();
  267. else
  268. {
  269. ++m_position;
  270. return parse_repeat();
  271. }
  272. case regex_constants::syntax_plus:
  273. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  274. return parse_literal();
  275. else
  276. {
  277. ++m_position;
  278. return parse_repeat(1);
  279. }
  280. case regex_constants::syntax_question:
  281. if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
  282. return parse_literal();
  283. else
  284. {
  285. ++m_position;
  286. return parse_repeat(0, 1);
  287. }
  288. case regex_constants::syntax_open_set:
  289. return parse_set();
  290. case regex_constants::syntax_newline:
  291. if(this->flags() & regbase::newline_alt)
  292. return parse_alt();
  293. else
  294. return parse_literal();
  295. default:
  296. return parse_literal();
  297. }
  298. return true;
  299. }
  300. #ifdef BOOST_REGEX_MSVC
  301. # pragma warning(push)
  302. #if BOOST_REGEX_MSVC >= 1800
  303. #pragma warning(disable:26812)
  304. #endif
  305. #endif
  306. template <class charT, class traits>
  307. bool basic_regex_parser<charT, traits>::parse_extended()
  308. {
  309. bool result = true;
  310. switch(this->m_traits.syntax_type(*m_position))
  311. {
  312. case regex_constants::syntax_open_mark:
  313. return parse_open_paren();
  314. case regex_constants::syntax_close_mark:
  315. return false;
  316. case regex_constants::syntax_escape:
  317. return parse_extended_escape();
  318. case regex_constants::syntax_dot:
  319. return parse_match_any();
  320. case regex_constants::syntax_caret:
  321. ++m_position;
  322. this->append_state(
  323. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
  324. break;
  325. case regex_constants::syntax_dollar:
  326. ++m_position;
  327. this->append_state(
  328. (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
  329. break;
  330. case regex_constants::syntax_star:
  331. if(m_position == this->m_base)
  332. {
  333. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
  334. return false;
  335. }
  336. ++m_position;
  337. return parse_repeat();
  338. case regex_constants::syntax_question:
  339. if(m_position == this->m_base)
  340. {
  341. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
  342. return false;
  343. }
  344. ++m_position;
  345. return parse_repeat(0,1);
  346. case regex_constants::syntax_plus:
  347. if(m_position == this->m_base)
  348. {
  349. fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
  350. return false;
  351. }
  352. ++m_position;
  353. return parse_repeat(1);
  354. case regex_constants::syntax_open_brace:
  355. ++m_position;
  356. return parse_repeat_range(false);
  357. case regex_constants::syntax_close_brace:
  358. if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
  359. {
  360. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  361. return false;
  362. }
  363. result = parse_literal();
  364. break;
  365. case regex_constants::syntax_or:
  366. return parse_alt();
  367. case regex_constants::syntax_open_set:
  368. return parse_set();
  369. case regex_constants::syntax_newline:
  370. if(this->flags() & regbase::newline_alt)
  371. return parse_alt();
  372. else
  373. return parse_literal();
  374. case regex_constants::syntax_hash:
  375. //
  376. // If we have a mod_x flag set, then skip until
  377. // we get to a newline character:
  378. //
  379. if((this->flags()
  380. & (regbase::no_perl_ex|regbase::mod_x))
  381. == regbase::mod_x)
  382. {
  383. while((m_position != m_end) && !is_separator(*m_position++)){}
  384. return true;
  385. }
  386. BOOST_REGEX_FALLTHROUGH;
  387. default:
  388. result = parse_literal();
  389. break;
  390. }
  391. return result;
  392. }
  393. #ifdef BOOST_REGEX_MSVC
  394. # pragma warning(pop)
  395. #endif
  396. #ifdef BOOST_REGEX_MSVC
  397. #pragma warning(pop)
  398. #endif
  399. template <class charT, class traits>
  400. bool basic_regex_parser<charT, traits>::parse_literal()
  401. {
  402. // append this as a literal provided it's not a space character
  403. // or the perl option regbase::mod_x is not set:
  404. if(
  405. ((this->flags()
  406. & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
  407. != regbase::mod_x)
  408. || !this->m_traits.isctype(*m_position, this->m_mask_space))
  409. this->append_literal(*m_position);
  410. ++m_position;
  411. return true;
  412. }
  413. template <class charT, class traits>
  414. bool basic_regex_parser<charT, traits>::parse_open_paren()
  415. {
  416. //
  417. // skip the '(' and error check:
  418. //
  419. if(++m_position == m_end)
  420. {
  421. fail(regex_constants::error_paren, m_position - m_base);
  422. return false;
  423. }
  424. //
  425. // begin by checking for a perl-style (?...) extension:
  426. //
  427. if(
  428. ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
  429. || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  430. )
  431. {
  432. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
  433. return parse_perl_extension();
  434. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
  435. return parse_perl_verb();
  436. }
  437. //
  438. // update our mark count, and append the required state:
  439. //
  440. unsigned markid = 0;
  441. if(0 == (this->flags() & regbase::nosubs))
  442. {
  443. markid = ++m_mark_count;
  444. if(this->flags() & regbase::save_subexpression_location)
  445. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
  446. }
  447. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  448. pb->index = markid;
  449. pb->icase = this->flags() & regbase::icase;
  450. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  451. // back up insertion point for alternations, and set new point:
  452. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  453. this->m_pdata->m_data.align();
  454. m_alt_insert_point = this->m_pdata->m_data.size();
  455. //
  456. // back up the current flags in case we have a nested (?imsx) group:
  457. //
  458. regex_constants::syntax_option_type opts = this->flags();
  459. bool old_case_change = m_has_case_change;
  460. m_has_case_change = false; // no changes to this scope as yet...
  461. //
  462. // Back up branch reset data in case we have a nested (?|...)
  463. //
  464. int mark_reset = m_mark_reset;
  465. m_mark_reset = -1;
  466. //
  467. // now recursively add more states, this will terminate when we get to a
  468. // matching ')' :
  469. //
  470. parse_all();
  471. //
  472. // Unwind pushed alternatives:
  473. //
  474. if(0 == unwind_alts(last_paren_start))
  475. return false;
  476. //
  477. // restore flags:
  478. //
  479. if(m_has_case_change)
  480. {
  481. // the case has changed in one or more of the alternatives
  482. // within the scoped (...) block: we have to add a state
  483. // to reset the case sensitivity:
  484. static_cast<re_case*>(
  485. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  486. )->icase = opts & regbase::icase;
  487. }
  488. this->flags(opts);
  489. m_has_case_change = old_case_change;
  490. //
  491. // restore branch reset:
  492. //
  493. m_mark_reset = mark_reset;
  494. //
  495. // we either have a ')' or we have run out of characters prematurely:
  496. //
  497. if(m_position == m_end)
  498. {
  499. this->fail(regex_constants::error_paren, std::distance(m_base, m_end));
  500. return false;
  501. }
  502. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  503. return false;
  504. if(markid && (this->flags() & regbase::save_subexpression_location))
  505. this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
  506. ++m_position;
  507. //
  508. // append closing parenthesis state:
  509. //
  510. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  511. pb->index = markid;
  512. pb->icase = this->flags() & regbase::icase;
  513. this->m_paren_start = last_paren_start;
  514. //
  515. // restore the alternate insertion point:
  516. //
  517. this->m_alt_insert_point = last_alt_point;
  518. return true;
  519. }
  520. template <class charT, class traits>
  521. bool basic_regex_parser<charT, traits>::parse_basic_escape()
  522. {
  523. if(++m_position == m_end)
  524. {
  525. fail(regex_constants::error_paren, m_position - m_base);
  526. return false;
  527. }
  528. bool result = true;
  529. switch(this->m_traits.escape_syntax_type(*m_position))
  530. {
  531. case regex_constants::syntax_open_mark:
  532. return parse_open_paren();
  533. case regex_constants::syntax_close_mark:
  534. return false;
  535. case regex_constants::syntax_plus:
  536. if(this->flags() & regex_constants::bk_plus_qm)
  537. {
  538. ++m_position;
  539. return parse_repeat(1);
  540. }
  541. else
  542. return parse_literal();
  543. case regex_constants::syntax_question:
  544. if(this->flags() & regex_constants::bk_plus_qm)
  545. {
  546. ++m_position;
  547. return parse_repeat(0, 1);
  548. }
  549. else
  550. return parse_literal();
  551. case regex_constants::syntax_open_brace:
  552. if(this->flags() & regbase::no_intervals)
  553. return parse_literal();
  554. ++m_position;
  555. return parse_repeat_range(true);
  556. case regex_constants::syntax_close_brace:
  557. if(this->flags() & regbase::no_intervals)
  558. return parse_literal();
  559. fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
  560. return false;
  561. case regex_constants::syntax_or:
  562. if(this->flags() & regbase::bk_vbar)
  563. return parse_alt();
  564. else
  565. result = parse_literal();
  566. break;
  567. case regex_constants::syntax_digit:
  568. return parse_backref();
  569. case regex_constants::escape_type_start_buffer:
  570. if(this->flags() & regbase::emacs_ex)
  571. {
  572. ++m_position;
  573. this->append_state(syntax_element_buffer_start);
  574. }
  575. else
  576. result = parse_literal();
  577. break;
  578. case regex_constants::escape_type_end_buffer:
  579. if(this->flags() & regbase::emacs_ex)
  580. {
  581. ++m_position;
  582. this->append_state(syntax_element_buffer_end);
  583. }
  584. else
  585. result = parse_literal();
  586. break;
  587. case regex_constants::escape_type_word_assert:
  588. if(this->flags() & regbase::emacs_ex)
  589. {
  590. ++m_position;
  591. this->append_state(syntax_element_word_boundary);
  592. }
  593. else
  594. result = parse_literal();
  595. break;
  596. case regex_constants::escape_type_not_word_assert:
  597. if(this->flags() & regbase::emacs_ex)
  598. {
  599. ++m_position;
  600. this->append_state(syntax_element_within_word);
  601. }
  602. else
  603. result = parse_literal();
  604. break;
  605. case regex_constants::escape_type_left_word:
  606. if(this->flags() & regbase::emacs_ex)
  607. {
  608. ++m_position;
  609. this->append_state(syntax_element_word_start);
  610. }
  611. else
  612. result = parse_literal();
  613. break;
  614. case regex_constants::escape_type_right_word:
  615. if(this->flags() & regbase::emacs_ex)
  616. {
  617. ++m_position;
  618. this->append_state(syntax_element_word_end);
  619. }
  620. else
  621. result = parse_literal();
  622. break;
  623. default:
  624. if(this->flags() & regbase::emacs_ex)
  625. {
  626. bool negate = true;
  627. switch(*m_position)
  628. {
  629. case 'w':
  630. negate = false;
  631. BOOST_REGEX_FALLTHROUGH;
  632. case 'W':
  633. {
  634. basic_char_set<charT, traits> char_set;
  635. if(negate)
  636. char_set.negate();
  637. char_set.add_class(this->m_word_mask);
  638. if(0 == this->append_set(char_set))
  639. {
  640. fail(regex_constants::error_ctype, m_position - m_base);
  641. return false;
  642. }
  643. ++m_position;
  644. return true;
  645. }
  646. case 's':
  647. negate = false;
  648. BOOST_REGEX_FALLTHROUGH;
  649. case 'S':
  650. return add_emacs_code(negate);
  651. case 'c':
  652. case 'C':
  653. // not supported yet:
  654. fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
  655. return false;
  656. default:
  657. break;
  658. }
  659. }
  660. result = parse_literal();
  661. break;
  662. }
  663. return result;
  664. }
  665. template <class charT, class traits>
  666. bool basic_regex_parser<charT, traits>::parse_extended_escape()
  667. {
  668. ++m_position;
  669. if(m_position == m_end)
  670. {
  671. fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
  672. return false;
  673. }
  674. bool negate = false; // in case this is a character class escape: \w \d etc
  675. switch(this->m_traits.escape_syntax_type(*m_position))
  676. {
  677. case regex_constants::escape_type_not_class:
  678. negate = true;
  679. BOOST_REGEX_FALLTHROUGH;
  680. case regex_constants::escape_type_class:
  681. {
  682. escape_type_class_jump:
  683. typedef typename traits::char_class_type m_type;
  684. m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  685. if(m != 0)
  686. {
  687. basic_char_set<charT, traits> char_set;
  688. if(negate)
  689. char_set.negate();
  690. char_set.add_class(m);
  691. if(0 == this->append_set(char_set))
  692. {
  693. fail(regex_constants::error_ctype, m_position - m_base);
  694. return false;
  695. }
  696. ++m_position;
  697. return true;
  698. }
  699. //
  700. // not a class, just a regular unknown escape:
  701. //
  702. this->append_literal(unescape_character());
  703. break;
  704. }
  705. case regex_constants::syntax_digit:
  706. return parse_backref();
  707. case regex_constants::escape_type_left_word:
  708. ++m_position;
  709. this->append_state(syntax_element_word_start);
  710. break;
  711. case regex_constants::escape_type_right_word:
  712. ++m_position;
  713. this->append_state(syntax_element_word_end);
  714. break;
  715. case regex_constants::escape_type_start_buffer:
  716. ++m_position;
  717. this->append_state(syntax_element_buffer_start);
  718. break;
  719. case regex_constants::escape_type_end_buffer:
  720. ++m_position;
  721. this->append_state(syntax_element_buffer_end);
  722. break;
  723. case regex_constants::escape_type_word_assert:
  724. ++m_position;
  725. this->append_state(syntax_element_word_boundary);
  726. break;
  727. case regex_constants::escape_type_not_word_assert:
  728. ++m_position;
  729. this->append_state(syntax_element_within_word);
  730. break;
  731. case regex_constants::escape_type_Z:
  732. ++m_position;
  733. this->append_state(syntax_element_soft_buffer_end);
  734. break;
  735. case regex_constants::escape_type_Q:
  736. return parse_QE();
  737. case regex_constants::escape_type_C:
  738. return parse_match_any();
  739. case regex_constants::escape_type_X:
  740. ++m_position;
  741. this->append_state(syntax_element_combining);
  742. break;
  743. case regex_constants::escape_type_G:
  744. ++m_position;
  745. this->append_state(syntax_element_restart_continue);
  746. break;
  747. case regex_constants::escape_type_not_property:
  748. negate = true;
  749. BOOST_REGEX_FALLTHROUGH;
  750. case regex_constants::escape_type_property:
  751. {
  752. ++m_position;
  753. char_class_type m;
  754. if(m_position == m_end)
  755. {
  756. fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
  757. return false;
  758. }
  759. // maybe have \p{ddd}
  760. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  761. {
  762. const charT* base = m_position;
  763. // skip forward until we find enclosing brace:
  764. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  765. ++m_position;
  766. if(m_position == m_end)
  767. {
  768. fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
  769. return false;
  770. }
  771. m = this->m_traits.lookup_classname(++base, m_position++);
  772. }
  773. else
  774. {
  775. m = this->m_traits.lookup_classname(m_position, m_position+1);
  776. ++m_position;
  777. }
  778. if(m != 0)
  779. {
  780. basic_char_set<charT, traits> char_set;
  781. if(negate)
  782. char_set.negate();
  783. char_set.add_class(m);
  784. if(0 == this->append_set(char_set))
  785. {
  786. fail(regex_constants::error_ctype, m_position - m_base);
  787. return false;
  788. }
  789. return true;
  790. }
  791. fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
  792. return false;
  793. }
  794. case regex_constants::escape_type_reset_start_mark:
  795. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  796. {
  797. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  798. pb->index = -5;
  799. pb->icase = this->flags() & regbase::icase;
  800. this->m_pdata->m_data.align();
  801. ++m_position;
  802. return true;
  803. }
  804. goto escape_type_class_jump;
  805. case regex_constants::escape_type_line_ending:
  806. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  807. {
  808. const charT* e = get_escape_R_string<charT>();
  809. const charT* old_position = m_position;
  810. const charT* old_end = m_end;
  811. const charT* old_base = m_base;
  812. m_position = e;
  813. m_base = e;
  814. m_end = e + traits::length(e);
  815. bool r = parse_all();
  816. m_position = ++old_position;
  817. m_end = old_end;
  818. m_base = old_base;
  819. return r;
  820. }
  821. goto escape_type_class_jump;
  822. case regex_constants::escape_type_extended_backref:
  823. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  824. {
  825. bool have_brace = false;
  826. bool negative = false;
  827. static const char incomplete_message[] = "Incomplete \\g escape found.";
  828. if(++m_position == m_end)
  829. {
  830. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  831. return false;
  832. }
  833. // maybe have \g{ddd}
  834. regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
  835. regex_constants::syntax_type syn_end = 0;
  836. if((syn == regex_constants::syntax_open_brace)
  837. || (syn == regex_constants::escape_type_left_word)
  838. || (syn == regex_constants::escape_type_end_buffer))
  839. {
  840. if(++m_position == m_end)
  841. {
  842. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  843. return false;
  844. }
  845. have_brace = true;
  846. switch(syn)
  847. {
  848. case regex_constants::syntax_open_brace:
  849. syn_end = regex_constants::syntax_close_brace;
  850. break;
  851. case regex_constants::escape_type_left_word:
  852. syn_end = regex_constants::escape_type_right_word;
  853. break;
  854. default:
  855. syn_end = regex_constants::escape_type_end_buffer;
  856. break;
  857. }
  858. }
  859. negative = (*m_position == static_cast<charT>('-'));
  860. if((negative) && (++m_position == m_end))
  861. {
  862. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  863. return false;
  864. }
  865. const charT* pc = m_position;
  866. std::intmax_t i = this->m_traits.toi(pc, m_end, 10);
  867. if((i < 0) && syn_end)
  868. {
  869. // Check for a named capture, get the leftmost one if there is more than one:
  870. const charT* base = m_position;
  871. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
  872. {
  873. ++m_position;
  874. }
  875. i = hash_value_from_capture_name(base, m_position);
  876. pc = m_position;
  877. }
  878. if(negative)
  879. i = 1 + (static_cast<std::intmax_t>(m_mark_count) - i);
  880. if(((i < hash_value_mask) && (i > 0)) || ((i >= hash_value_mask) && (this->m_pdata->get_id((int)i) > 0)))
  881. {
  882. m_position = pc;
  883. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  884. pb->index = (int)i;
  885. pb->icase = this->flags() & regbase::icase;
  886. if ((i > m_max_backref) && (i < hash_value_mask))
  887. m_max_backref = i;
  888. }
  889. else
  890. {
  891. fail(regex_constants::error_backref, m_position - m_base);
  892. return false;
  893. }
  894. m_position = pc;
  895. if(have_brace)
  896. {
  897. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
  898. {
  899. fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
  900. return false;
  901. }
  902. ++m_position;
  903. }
  904. return true;
  905. }
  906. goto escape_type_class_jump;
  907. case regex_constants::escape_type_control_v:
  908. if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  909. goto escape_type_class_jump;
  910. BOOST_REGEX_FALLTHROUGH;
  911. default:
  912. this->append_literal(unescape_character());
  913. break;
  914. }
  915. return true;
  916. }
  917. template <class charT, class traits>
  918. bool basic_regex_parser<charT, traits>::parse_match_any()
  919. {
  920. //
  921. // we have a '.' that can match any character:
  922. //
  923. ++m_position;
  924. static_cast<re_dot*>(
  925. this->append_state(syntax_element_wild, sizeof(re_dot))
  926. )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
  927. ? BOOST_REGEX_DETAIL_NS::force_not_newline
  928. : this->flags() & regbase::mod_s ?
  929. BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
  930. return true;
  931. }
  932. template <class charT, class traits>
  933. bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
  934. {
  935. bool greedy = true;
  936. bool possessive = false;
  937. std::size_t insert_point;
  938. //
  939. // when we get to here we may have a non-greedy ? mark still to come:
  940. //
  941. if((m_position != m_end)
  942. && (
  943. (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
  944. || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
  945. )
  946. )
  947. {
  948. // OK we have a perl or emacs regex, check for a '?':
  949. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  950. {
  951. // whitespace skip:
  952. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  953. ++m_position;
  954. }
  955. if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
  956. {
  957. greedy = false;
  958. ++m_position;
  959. }
  960. // for perl regexes only check for possessive ++ repeats.
  961. if((m_position != m_end)
  962. && (0 == (this->flags() & regbase::main_option_type))
  963. && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
  964. {
  965. possessive = true;
  966. ++m_position;
  967. }
  968. }
  969. if(0 == this->m_last_state)
  970. {
  971. fail(regex_constants::error_badrepeat, std::distance(m_base, m_position), "Nothing to repeat.");
  972. return false;
  973. }
  974. if(this->m_last_state->type == syntax_element_endmark)
  975. {
  976. // insert a repeat before the '(' matching the last ')':
  977. insert_point = this->m_paren_start;
  978. }
  979. else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
  980. {
  981. // the last state was a literal with more than one character, split it in two:
  982. re_literal* lit = static_cast<re_literal*>(this->m_last_state);
  983. charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
  984. lit->length -= 1;
  985. // now append new state:
  986. lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
  987. lit->length = 1;
  988. (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
  989. insert_point = this->getoffset(this->m_last_state);
  990. }
  991. else
  992. {
  993. // repeat the last state whatever it was, need to add some error checking here:
  994. switch(this->m_last_state->type)
  995. {
  996. case syntax_element_start_line:
  997. case syntax_element_end_line:
  998. case syntax_element_word_boundary:
  999. case syntax_element_within_word:
  1000. case syntax_element_word_start:
  1001. case syntax_element_word_end:
  1002. case syntax_element_buffer_start:
  1003. case syntax_element_buffer_end:
  1004. case syntax_element_alt:
  1005. case syntax_element_soft_buffer_end:
  1006. case syntax_element_restart_continue:
  1007. case syntax_element_jump:
  1008. case syntax_element_startmark:
  1009. case syntax_element_backstep:
  1010. case syntax_element_toggle_case:
  1011. // can't legally repeat any of the above:
  1012. fail(regex_constants::error_badrepeat, m_position - m_base);
  1013. return false;
  1014. default:
  1015. // do nothing...
  1016. break;
  1017. }
  1018. insert_point = this->getoffset(this->m_last_state);
  1019. }
  1020. //
  1021. // OK we now know what to repeat, so insert the repeat around it:
  1022. //
  1023. re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
  1024. rep->min = low;
  1025. rep->max = high;
  1026. rep->greedy = greedy;
  1027. rep->leading = false;
  1028. // store our repeater position for later:
  1029. std::ptrdiff_t rep_off = this->getoffset(rep);
  1030. // and append a back jump to the repeat:
  1031. re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
  1032. jmp->alt.i = rep_off - this->getoffset(jmp);
  1033. this->m_pdata->m_data.align();
  1034. // now fill in the alt jump for the repeat:
  1035. rep = static_cast<re_repeat*>(this->getaddress(rep_off));
  1036. rep->alt.i = this->m_pdata->m_data.size() - rep_off;
  1037. //
  1038. // If the repeat is possessive then bracket the repeat with a (?>...)
  1039. // independent sub-expression construct:
  1040. //
  1041. if(possessive)
  1042. {
  1043. if(m_position != m_end)
  1044. {
  1045. //
  1046. // Check for illegal following quantifier, we have to do this here, because
  1047. // the extra states we insert below circumvents our usual error checking :-(
  1048. //
  1049. bool contin = false;
  1050. do
  1051. {
  1052. if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
  1053. {
  1054. // whitespace skip:
  1055. while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1056. ++m_position;
  1057. }
  1058. if (m_position != m_end)
  1059. {
  1060. switch (this->m_traits.syntax_type(*m_position))
  1061. {
  1062. case regex_constants::syntax_star:
  1063. case regex_constants::syntax_plus:
  1064. case regex_constants::syntax_question:
  1065. case regex_constants::syntax_open_brace:
  1066. fail(regex_constants::error_badrepeat, m_position - m_base);
  1067. return false;
  1068. case regex_constants::syntax_open_mark:
  1069. // Do we have a comment? If so we need to skip it here...
  1070. if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
  1071. && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
  1072. {
  1073. while ((m_position != m_end)
  1074. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
  1075. }
  1076. contin = true;
  1077. }
  1078. else
  1079. contin = false;
  1080. break;
  1081. default:
  1082. contin = false;
  1083. }
  1084. }
  1085. else
  1086. contin = false;
  1087. } while (contin);
  1088. }
  1089. re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
  1090. pb->index = -3;
  1091. pb->icase = this->flags() & regbase::icase;
  1092. jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
  1093. this->m_pdata->m_data.align();
  1094. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  1095. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  1096. pb->index = -3;
  1097. pb->icase = this->flags() & regbase::icase;
  1098. }
  1099. return true;
  1100. }
  1101. template <class charT, class traits>
  1102. bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
  1103. {
  1104. static const char incomplete_message[] = "Missing } in quantified repetition.";
  1105. //
  1106. // parse a repeat-range:
  1107. //
  1108. std::size_t min, max;
  1109. std::intmax_t v;
  1110. // skip whitespace:
  1111. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1112. ++m_position;
  1113. if(this->m_position == this->m_end)
  1114. {
  1115. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1116. {
  1117. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1118. return false;
  1119. }
  1120. // Treat the opening '{' as a literal character, rewind to start of error:
  1121. --m_position;
  1122. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1123. return parse_literal();
  1124. }
  1125. // get min:
  1126. v = this->m_traits.toi(m_position, m_end, 10);
  1127. // skip whitespace:
  1128. if((v < 0) || (v > umax()))
  1129. {
  1130. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1131. {
  1132. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1133. return false;
  1134. }
  1135. // Treat the opening '{' as a literal character, rewind to start of error:
  1136. --m_position;
  1137. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1138. return parse_literal();
  1139. }
  1140. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1141. ++m_position;
  1142. if(this->m_position == this->m_end)
  1143. {
  1144. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1145. {
  1146. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1147. return false;
  1148. }
  1149. // Treat the opening '{' as a literal character, rewind to start of error:
  1150. --m_position;
  1151. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1152. return parse_literal();
  1153. }
  1154. min = static_cast<std::size_t>(v);
  1155. // see if we have a comma:
  1156. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
  1157. {
  1158. // move on and error check:
  1159. ++m_position;
  1160. // skip whitespace:
  1161. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1162. ++m_position;
  1163. if(this->m_position == this->m_end)
  1164. {
  1165. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1166. {
  1167. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1168. return false;
  1169. }
  1170. // Treat the opening '{' as a literal character, rewind to start of error:
  1171. --m_position;
  1172. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1173. return parse_literal();
  1174. }
  1175. // get the value if any:
  1176. v = this->m_traits.toi(m_position, m_end, 10);
  1177. max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
  1178. }
  1179. else
  1180. {
  1181. // no comma, max = min:
  1182. max = min;
  1183. }
  1184. // skip whitespace:
  1185. while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
  1186. ++m_position;
  1187. // OK now check trailing }:
  1188. if(this->m_position == this->m_end)
  1189. {
  1190. if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
  1191. {
  1192. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1193. return false;
  1194. }
  1195. // Treat the opening '{' as a literal character, rewind to start of error:
  1196. --m_position;
  1197. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1198. return parse_literal();
  1199. }
  1200. if(isbasic)
  1201. {
  1202. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
  1203. {
  1204. ++m_position;
  1205. if(this->m_position == this->m_end)
  1206. {
  1207. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1208. return false;
  1209. }
  1210. }
  1211. else
  1212. {
  1213. fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
  1214. return false;
  1215. }
  1216. }
  1217. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
  1218. ++m_position;
  1219. else
  1220. {
  1221. // Treat the opening '{' as a literal character, rewind to start of error:
  1222. --m_position;
  1223. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
  1224. return parse_literal();
  1225. }
  1226. //
  1227. // finally go and add the repeat, unless error:
  1228. //
  1229. if(min > max)
  1230. {
  1231. // Backtrack to error location:
  1232. m_position -= 2;
  1233. while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
  1234. ++m_position;
  1235. fail(regex_constants::error_badbrace, m_position - m_base);
  1236. return false;
  1237. }
  1238. return parse_repeat(min, max);
  1239. }
  1240. template <class charT, class traits>
  1241. bool basic_regex_parser<charT, traits>::parse_alt()
  1242. {
  1243. //
  1244. // error check: if there have been no previous states,
  1245. // or if the last state was a '(' then error:
  1246. //
  1247. if(
  1248. ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
  1249. &&
  1250. !(
  1251. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  1252. &&
  1253. ((this->flags() & regbase::no_empty_expressions) == 0)
  1254. )
  1255. )
  1256. {
  1257. fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
  1258. return false;
  1259. }
  1260. //
  1261. // Reset mark count if required:
  1262. //
  1263. if(m_max_mark < m_mark_count)
  1264. m_max_mark = m_mark_count;
  1265. if(m_mark_reset >= 0)
  1266. m_mark_count = m_mark_reset;
  1267. ++m_position;
  1268. //
  1269. // we need to append a trailing jump:
  1270. //
  1271. re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
  1272. std::ptrdiff_t jump_offset = this->getoffset(pj);
  1273. //
  1274. // now insert the alternative:
  1275. //
  1276. re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
  1277. jump_offset += re_alt_size;
  1278. this->m_pdata->m_data.align();
  1279. palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
  1280. //
  1281. // update m_alt_insert_point so that the next alternate gets
  1282. // inserted at the start of the second of the two we've just created:
  1283. //
  1284. this->m_alt_insert_point = this->m_pdata->m_data.size();
  1285. //
  1286. // the start of this alternative must have a case changes state
  1287. // if the current block has messed around with case changes:
  1288. //
  1289. if(m_has_case_change)
  1290. {
  1291. static_cast<re_case*>(
  1292. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  1293. )->icase = this->m_icase;
  1294. }
  1295. //
  1296. // push the alternative onto our stack, a recursive
  1297. // implementation here is easier to understand (and faster
  1298. // as it happens), but causes all kinds of stack overflow problems
  1299. // on programs with small stacks (COM+).
  1300. //
  1301. m_alt_jumps.push_back(jump_offset);
  1302. return true;
  1303. }
  1304. template <class charT, class traits>
  1305. bool basic_regex_parser<charT, traits>::parse_set()
  1306. {
  1307. static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1308. ++m_position;
  1309. if(m_position == m_end)
  1310. {
  1311. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1312. return false;
  1313. }
  1314. basic_char_set<charT, traits> char_set;
  1315. const charT* base = m_position; // where the '[' was
  1316. const charT* item_base = m_position; // where the '[' or '^' was
  1317. while(m_position != m_end)
  1318. {
  1319. switch(this->m_traits.syntax_type(*m_position))
  1320. {
  1321. case regex_constants::syntax_caret:
  1322. if(m_position == base)
  1323. {
  1324. char_set.negate();
  1325. ++m_position;
  1326. item_base = m_position;
  1327. }
  1328. else
  1329. parse_set_literal(char_set);
  1330. break;
  1331. case regex_constants::syntax_close_set:
  1332. if(m_position == item_base)
  1333. {
  1334. parse_set_literal(char_set);
  1335. break;
  1336. }
  1337. else
  1338. {
  1339. ++m_position;
  1340. if(0 == this->append_set(char_set))
  1341. {
  1342. fail(regex_constants::error_ctype, m_position - m_base);
  1343. return false;
  1344. }
  1345. }
  1346. return true;
  1347. case regex_constants::syntax_open_set:
  1348. if(parse_inner_set(char_set))
  1349. break;
  1350. return true;
  1351. case regex_constants::syntax_escape:
  1352. {
  1353. //
  1354. // look ahead and see if this is a character class shortcut
  1355. // \d \w \s etc...
  1356. //
  1357. ++m_position;
  1358. if(this->m_traits.escape_syntax_type(*m_position)
  1359. == regex_constants::escape_type_class)
  1360. {
  1361. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1362. if(m != 0)
  1363. {
  1364. char_set.add_class(m);
  1365. ++m_position;
  1366. break;
  1367. }
  1368. }
  1369. else if(this->m_traits.escape_syntax_type(*m_position)
  1370. == regex_constants::escape_type_not_class)
  1371. {
  1372. // negated character class:
  1373. char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
  1374. if(m != 0)
  1375. {
  1376. char_set.add_negated_class(m);
  1377. ++m_position;
  1378. break;
  1379. }
  1380. }
  1381. // not a character class, just a regular escape:
  1382. --m_position;
  1383. parse_set_literal(char_set);
  1384. break;
  1385. }
  1386. default:
  1387. parse_set_literal(char_set);
  1388. break;
  1389. }
  1390. }
  1391. return m_position != m_end;
  1392. }
  1393. template <class charT, class traits>
  1394. bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
  1395. {
  1396. static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
  1397. //
  1398. // we have either a character class [:name:]
  1399. // a collating element [.name.]
  1400. // or an equivalence class [=name=]
  1401. //
  1402. if(m_end == ++m_position)
  1403. {
  1404. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1405. return false;
  1406. }
  1407. switch(this->m_traits.syntax_type(*m_position))
  1408. {
  1409. case regex_constants::syntax_dot:
  1410. //
  1411. // a collating element is treated as a literal:
  1412. //
  1413. --m_position;
  1414. parse_set_literal(char_set);
  1415. return true;
  1416. case regex_constants::syntax_colon:
  1417. {
  1418. // check that character classes are actually enabled:
  1419. if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
  1420. == (regbase::basic_syntax_group | regbase::no_char_classes))
  1421. {
  1422. --m_position;
  1423. parse_set_literal(char_set);
  1424. return true;
  1425. }
  1426. // skip the ':'
  1427. if(m_end == ++m_position)
  1428. {
  1429. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1430. return false;
  1431. }
  1432. const charT* name_first = m_position;
  1433. // skip at least one character, then find the matching ':]'
  1434. if(m_end == ++m_position)
  1435. {
  1436. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1437. return false;
  1438. }
  1439. while((m_position != m_end)
  1440. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
  1441. ++m_position;
  1442. const charT* name_last = m_position;
  1443. if(m_end == m_position)
  1444. {
  1445. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1446. return false;
  1447. }
  1448. if((m_end == ++m_position)
  1449. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1450. {
  1451. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1452. return false;
  1453. }
  1454. //
  1455. // check for negated class:
  1456. //
  1457. bool negated = false;
  1458. if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
  1459. {
  1460. ++name_first;
  1461. negated = true;
  1462. }
  1463. typedef typename traits::char_class_type m_type;
  1464. m_type m = this->m_traits.lookup_classname(name_first, name_last);
  1465. if(m == 0)
  1466. {
  1467. if(char_set.empty() && (name_last - name_first == 1))
  1468. {
  1469. // maybe a special case:
  1470. ++m_position;
  1471. if( (m_position != m_end)
  1472. && (this->m_traits.syntax_type(*m_position)
  1473. == regex_constants::syntax_close_set))
  1474. {
  1475. if(this->m_traits.escape_syntax_type(*name_first)
  1476. == regex_constants::escape_type_left_word)
  1477. {
  1478. ++m_position;
  1479. this->append_state(syntax_element_word_start);
  1480. return false;
  1481. }
  1482. if(this->m_traits.escape_syntax_type(*name_first)
  1483. == regex_constants::escape_type_right_word)
  1484. {
  1485. ++m_position;
  1486. this->append_state(syntax_element_word_end);
  1487. return false;
  1488. }
  1489. }
  1490. }
  1491. fail(regex_constants::error_ctype, name_first - m_base);
  1492. return false;
  1493. }
  1494. if(!negated)
  1495. char_set.add_class(m);
  1496. else
  1497. char_set.add_negated_class(m);
  1498. ++m_position;
  1499. break;
  1500. }
  1501. case regex_constants::syntax_equal:
  1502. {
  1503. // skip the '='
  1504. if(m_end == ++m_position)
  1505. {
  1506. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1507. return false;
  1508. }
  1509. const charT* name_first = m_position;
  1510. // skip at least one character, then find the matching '=]'
  1511. if(m_end == ++m_position)
  1512. {
  1513. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1514. return false;
  1515. }
  1516. while((m_position != m_end)
  1517. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
  1518. ++m_position;
  1519. const charT* name_last = m_position;
  1520. if(m_end == m_position)
  1521. {
  1522. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1523. return false;
  1524. }
  1525. if((m_end == ++m_position)
  1526. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1527. {
  1528. fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
  1529. return false;
  1530. }
  1531. string_type m = this->m_traits.lookup_collatename(name_first, name_last);
  1532. if(m.empty() || (m.size() > 2))
  1533. {
  1534. fail(regex_constants::error_collate, name_first - m_base);
  1535. return false;
  1536. }
  1537. digraph<charT> d;
  1538. d.first = m[0];
  1539. if(m.size() > 1)
  1540. d.second = m[1];
  1541. else
  1542. d.second = 0;
  1543. char_set.add_equivalent(d);
  1544. ++m_position;
  1545. break;
  1546. }
  1547. default:
  1548. --m_position;
  1549. parse_set_literal(char_set);
  1550. break;
  1551. }
  1552. return true;
  1553. }
  1554. template <class charT, class traits>
  1555. void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
  1556. {
  1557. digraph<charT> start_range(get_next_set_literal(char_set));
  1558. if(m_end == m_position)
  1559. {
  1560. fail(regex_constants::error_brack, m_position - m_base);
  1561. return;
  1562. }
  1563. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1564. {
  1565. // we have a range:
  1566. if(m_end == ++m_position)
  1567. {
  1568. fail(regex_constants::error_brack, m_position - m_base);
  1569. return;
  1570. }
  1571. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
  1572. {
  1573. digraph<charT> end_range = get_next_set_literal(char_set);
  1574. char_set.add_range(start_range, end_range);
  1575. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
  1576. {
  1577. if(m_end == ++m_position)
  1578. {
  1579. fail(regex_constants::error_brack, m_position - m_base);
  1580. return;
  1581. }
  1582. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
  1583. {
  1584. // trailing - :
  1585. --m_position;
  1586. return;
  1587. }
  1588. fail(regex_constants::error_range, m_position - m_base);
  1589. return;
  1590. }
  1591. return;
  1592. }
  1593. --m_position;
  1594. }
  1595. char_set.add_single(start_range);
  1596. }
  1597. template <class charT, class traits>
  1598. digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
  1599. {
  1600. digraph<charT> result;
  1601. switch(this->m_traits.syntax_type(*m_position))
  1602. {
  1603. case regex_constants::syntax_dash:
  1604. if(!char_set.empty())
  1605. {
  1606. // see if we are at the end of the set:
  1607. if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1608. {
  1609. fail(regex_constants::error_range, m_position - m_base);
  1610. return result;
  1611. }
  1612. --m_position;
  1613. }
  1614. result.first = *m_position++;
  1615. return result;
  1616. case regex_constants::syntax_escape:
  1617. // check to see if escapes are supported first:
  1618. if(this->flags() & regex_constants::no_escape_in_lists)
  1619. {
  1620. result = *m_position++;
  1621. break;
  1622. }
  1623. ++m_position;
  1624. result = unescape_character();
  1625. break;
  1626. case regex_constants::syntax_open_set:
  1627. {
  1628. if(m_end == ++m_position)
  1629. {
  1630. fail(regex_constants::error_collate, m_position - m_base);
  1631. return result;
  1632. }
  1633. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
  1634. {
  1635. --m_position;
  1636. result.first = *m_position;
  1637. ++m_position;
  1638. return result;
  1639. }
  1640. if(m_end == ++m_position)
  1641. {
  1642. fail(regex_constants::error_collate, m_position - m_base);
  1643. return result;
  1644. }
  1645. const charT* name_first = m_position;
  1646. // skip at least one character, then find the matching ':]'
  1647. if(m_end == ++m_position)
  1648. {
  1649. fail(regex_constants::error_collate, name_first - m_base);
  1650. return result;
  1651. }
  1652. while((m_position != m_end)
  1653. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
  1654. ++m_position;
  1655. const charT* name_last = m_position;
  1656. if(m_end == m_position)
  1657. {
  1658. fail(regex_constants::error_collate, name_first - m_base);
  1659. return result;
  1660. }
  1661. if((m_end == ++m_position)
  1662. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
  1663. {
  1664. fail(regex_constants::error_collate, name_first - m_base);
  1665. return result;
  1666. }
  1667. ++m_position;
  1668. string_type s = this->m_traits.lookup_collatename(name_first, name_last);
  1669. if(s.empty() || (s.size() > 2))
  1670. {
  1671. fail(regex_constants::error_collate, name_first - m_base);
  1672. return result;
  1673. }
  1674. result.first = s[0];
  1675. if(s.size() > 1)
  1676. result.second = s[1];
  1677. else
  1678. result.second = 0;
  1679. return result;
  1680. }
  1681. default:
  1682. result = *m_position++;
  1683. }
  1684. return result;
  1685. }
  1686. //
  1687. // does a value fit in the specified charT type?
  1688. //
  1689. template <class charT>
  1690. bool valid_value(charT, std::intmax_t v, const std::integral_constant<bool, true>&)
  1691. {
  1692. return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
  1693. }
  1694. template <class charT>
  1695. bool valid_value(charT, std::intmax_t, const std::integral_constant<bool, false>&)
  1696. {
  1697. return true; // v will alsways fit in a charT
  1698. }
  1699. template <class charT>
  1700. bool valid_value(charT c, std::intmax_t v)
  1701. {
  1702. return valid_value(c, v, std::integral_constant<bool, (sizeof(charT) < sizeof(std::intmax_t))>());
  1703. }
  1704. template <class charT, class traits>
  1705. charT basic_regex_parser<charT, traits>::unescape_character()
  1706. {
  1707. #ifdef BOOST_REGEX_MSVC
  1708. #pragma warning(push)
  1709. #pragma warning(disable:4127)
  1710. #endif
  1711. charT result(0);
  1712. if(m_position == m_end)
  1713. {
  1714. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
  1715. return false;
  1716. }
  1717. switch(this->m_traits.escape_syntax_type(*m_position))
  1718. {
  1719. case regex_constants::escape_type_control_a:
  1720. result = charT('\a');
  1721. break;
  1722. case regex_constants::escape_type_e:
  1723. result = charT(27);
  1724. break;
  1725. case regex_constants::escape_type_control_f:
  1726. result = charT('\f');
  1727. break;
  1728. case regex_constants::escape_type_control_n:
  1729. result = charT('\n');
  1730. break;
  1731. case regex_constants::escape_type_control_r:
  1732. result = charT('\r');
  1733. break;
  1734. case regex_constants::escape_type_control_t:
  1735. result = charT('\t');
  1736. break;
  1737. case regex_constants::escape_type_control_v:
  1738. result = charT('\v');
  1739. break;
  1740. case regex_constants::escape_type_word_assert:
  1741. result = charT('\b');
  1742. break;
  1743. case regex_constants::escape_type_ascii_control:
  1744. ++m_position;
  1745. if(m_position == m_end)
  1746. {
  1747. // Rewind to start of escape:
  1748. --m_position;
  1749. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1750. fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
  1751. return result;
  1752. }
  1753. result = static_cast<charT>(*m_position % 32);
  1754. break;
  1755. case regex_constants::escape_type_hex:
  1756. ++m_position;
  1757. if(m_position == m_end)
  1758. {
  1759. // Rewind to start of escape:
  1760. --m_position;
  1761. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1762. fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
  1763. return result;
  1764. }
  1765. // maybe have \x{ddd}
  1766. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1767. {
  1768. ++m_position;
  1769. if(m_position == m_end)
  1770. {
  1771. // Rewind to start of escape:
  1772. --m_position;
  1773. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1774. fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
  1775. return result;
  1776. }
  1777. std::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
  1778. if((m_position == m_end)
  1779. || (i < 0)
  1780. || ((std::numeric_limits<charT>::is_specialized) && (i > (std::intmax_t)(std::numeric_limits<charT>::max)()))
  1781. || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1782. {
  1783. // Rewind to start of escape:
  1784. --m_position;
  1785. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1786. fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
  1787. return result;
  1788. }
  1789. ++m_position;
  1790. result = charT(i);
  1791. }
  1792. else
  1793. {
  1794. std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
  1795. std::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
  1796. if((i < 0)
  1797. || !valid_value(charT(0), i))
  1798. {
  1799. // Rewind to start of escape:
  1800. --m_position;
  1801. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1802. fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
  1803. return result;
  1804. }
  1805. result = charT(i);
  1806. }
  1807. return result;
  1808. case regex_constants::syntax_digit:
  1809. {
  1810. // an octal escape sequence, the first character must be a zero
  1811. // followed by up to 3 octal digits:
  1812. std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
  1813. const charT* bp = m_position;
  1814. std::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
  1815. if(val != 0)
  1816. {
  1817. // Rewind to start of escape:
  1818. --m_position;
  1819. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1820. // Oops not an octal escape after all:
  1821. fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
  1822. return result;
  1823. }
  1824. val = this->m_traits.toi(m_position, m_position + len, 8);
  1825. if((val < 0) || (val > (std::intmax_t)(std::numeric_limits<charT>::max)()))
  1826. {
  1827. // Rewind to start of escape:
  1828. --m_position;
  1829. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1830. fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
  1831. return result;
  1832. }
  1833. return static_cast<charT>(val);
  1834. }
  1835. case regex_constants::escape_type_named_char:
  1836. {
  1837. ++m_position;
  1838. if(m_position == m_end)
  1839. {
  1840. // Rewind to start of escape:
  1841. --m_position;
  1842. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1843. fail(regex_constants::error_escape, m_position - m_base);
  1844. return false;
  1845. }
  1846. // maybe have \N{name}
  1847. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
  1848. {
  1849. const charT* base = m_position;
  1850. // skip forward until we find enclosing brace:
  1851. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
  1852. ++m_position;
  1853. if(m_position == m_end)
  1854. {
  1855. // Rewind to start of escape:
  1856. --m_position;
  1857. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1858. fail(regex_constants::error_escape, m_position - m_base);
  1859. return false;
  1860. }
  1861. string_type s = this->m_traits.lookup_collatename(++base, m_position++);
  1862. if(s.empty())
  1863. {
  1864. // Rewind to start of escape:
  1865. --m_position;
  1866. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1867. fail(regex_constants::error_collate, m_position - m_base);
  1868. return false;
  1869. }
  1870. if(s.size() == 1)
  1871. {
  1872. return s[0];
  1873. }
  1874. }
  1875. // fall through is a failure:
  1876. // Rewind to start of escape:
  1877. --m_position;
  1878. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1879. fail(regex_constants::error_escape, m_position - m_base);
  1880. return false;
  1881. }
  1882. default:
  1883. result = *m_position;
  1884. break;
  1885. }
  1886. ++m_position;
  1887. return result;
  1888. #ifdef BOOST_REGEX_MSVC
  1889. #pragma warning(pop)
  1890. #endif
  1891. }
  1892. template <class charT, class traits>
  1893. bool basic_regex_parser<charT, traits>::parse_backref()
  1894. {
  1895. BOOST_REGEX_ASSERT(m_position != m_end);
  1896. const charT* pc = m_position;
  1897. std::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
  1898. if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
  1899. {
  1900. // not a backref at all but an octal escape sequence:
  1901. charT c = unescape_character();
  1902. this->append_literal(c);
  1903. }
  1904. else if((i > 0))
  1905. {
  1906. m_position = pc;
  1907. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
  1908. pb->index = (int)i;
  1909. pb->icase = this->flags() & regbase::icase;
  1910. if(i > m_max_backref)
  1911. m_max_backref = i;
  1912. }
  1913. else
  1914. {
  1915. // Rewind to start of escape:
  1916. --m_position;
  1917. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  1918. fail(regex_constants::error_backref, m_position - m_base);
  1919. return false;
  1920. }
  1921. return true;
  1922. }
  1923. template <class charT, class traits>
  1924. bool basic_regex_parser<charT, traits>::parse_QE()
  1925. {
  1926. #ifdef BOOST_REGEX_MSVC
  1927. #pragma warning(push)
  1928. #pragma warning(disable:4127)
  1929. #endif
  1930. //
  1931. // parse a \Q...\E sequence:
  1932. //
  1933. ++m_position; // skip the Q
  1934. const charT* start = m_position;
  1935. const charT* end;
  1936. do
  1937. {
  1938. while((m_position != m_end)
  1939. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
  1940. ++m_position;
  1941. if(m_position == m_end)
  1942. {
  1943. // a \Q...\E sequence may terminate with the end of the expression:
  1944. end = m_position;
  1945. break;
  1946. }
  1947. if(++m_position == m_end) // skip the escape
  1948. {
  1949. fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
  1950. return false;
  1951. }
  1952. // check to see if it's a \E:
  1953. if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
  1954. {
  1955. ++m_position;
  1956. end = m_position - 2;
  1957. break;
  1958. }
  1959. // otherwise go round again:
  1960. }while(true);
  1961. //
  1962. // now add all the character between the two escapes as literals:
  1963. //
  1964. while(start != end)
  1965. {
  1966. this->append_literal(*start);
  1967. ++start;
  1968. }
  1969. return true;
  1970. #ifdef BOOST_REGEX_MSVC
  1971. #pragma warning(pop)
  1972. #endif
  1973. }
  1974. template <class charT, class traits>
  1975. bool basic_regex_parser<charT, traits>::parse_perl_extension()
  1976. {
  1977. if(++m_position == m_end)
  1978. {
  1979. // Rewind to start of (? sequence:
  1980. --m_position;
  1981. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  1982. fail(regex_constants::error_perl_extension, m_position - m_base);
  1983. return false;
  1984. }
  1985. //
  1986. // treat comments as a special case, as these
  1987. // are the only ones that don't start with a leading
  1988. // startmark state:
  1989. //
  1990. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
  1991. {
  1992. while((m_position != m_end)
  1993. && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
  1994. {}
  1995. return true;
  1996. }
  1997. //
  1998. // backup some state, and prepare the way:
  1999. //
  2000. int markid = 0;
  2001. std::ptrdiff_t jump_offset = 0;
  2002. re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
  2003. pb->icase = this->flags() & regbase::icase;
  2004. std::ptrdiff_t last_paren_start = this->getoffset(pb);
  2005. // back up insertion point for alternations, and set new point:
  2006. std::ptrdiff_t last_alt_point = m_alt_insert_point;
  2007. this->m_pdata->m_data.align();
  2008. m_alt_insert_point = this->m_pdata->m_data.size();
  2009. std::ptrdiff_t expected_alt_point = m_alt_insert_point;
  2010. bool restore_flags = true;
  2011. regex_constants::syntax_option_type old_flags = this->flags();
  2012. bool old_case_change = m_has_case_change;
  2013. m_has_case_change = false;
  2014. charT name_delim;
  2015. int mark_reset = m_mark_reset;
  2016. int max_mark = m_max_mark;
  2017. m_mark_reset = -1;
  2018. m_max_mark = m_mark_count;
  2019. std::intmax_t v;
  2020. //
  2021. // select the actual extension used:
  2022. //
  2023. switch(this->m_traits.syntax_type(*m_position))
  2024. {
  2025. case regex_constants::syntax_or:
  2026. m_mark_reset = m_mark_count;
  2027. BOOST_REGEX_FALLTHROUGH;
  2028. case regex_constants::syntax_colon:
  2029. //
  2030. // a non-capturing mark:
  2031. //
  2032. pb->index = markid = 0;
  2033. ++m_position;
  2034. break;
  2035. case regex_constants::syntax_digit:
  2036. {
  2037. //
  2038. // a recursive subexpression:
  2039. //
  2040. v = this->m_traits.toi(m_position, m_end, 10);
  2041. if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2042. {
  2043. // Rewind to start of (? sequence:
  2044. --m_position;
  2045. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2046. fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
  2047. return false;
  2048. }
  2049. insert_recursion:
  2050. pb->index = markid = 0;
  2051. re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
  2052. pr->alt.i = (std::ptrdiff_t)v;
  2053. pr->state_id = 0;
  2054. static_cast<re_case*>(
  2055. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2056. )->icase = this->flags() & regbase::icase;
  2057. break;
  2058. }
  2059. case regex_constants::syntax_plus:
  2060. //
  2061. // A forward-relative recursive subexpression:
  2062. //
  2063. ++m_position;
  2064. v = this->m_traits.toi(m_position, m_end, 10);
  2065. if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2066. {
  2067. // Rewind to start of (? sequence:
  2068. --m_position;
  2069. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2070. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2071. return false;
  2072. }
  2073. if ((std::numeric_limits<std::intmax_t>::max)() - m_mark_count < v)
  2074. {
  2075. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2076. return false;
  2077. }
  2078. v += m_mark_count;
  2079. goto insert_recursion;
  2080. case regex_constants::syntax_dash:
  2081. //
  2082. // Possibly a backward-relative recursive subexpression:
  2083. //
  2084. ++m_position;
  2085. v = this->m_traits.toi(m_position, m_end, 10);
  2086. if(v <= 0)
  2087. {
  2088. --m_position;
  2089. // Oops not a relative recursion at all, but a (?-imsx) group:
  2090. goto option_group_jump;
  2091. }
  2092. v = static_cast<std::intmax_t>(m_mark_count) + 1 - v;
  2093. if(v <= 0)
  2094. {
  2095. // Rewind to start of (? sequence:
  2096. --m_position;
  2097. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2098. fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
  2099. return false;
  2100. }
  2101. goto insert_recursion;
  2102. case regex_constants::syntax_equal:
  2103. pb->index = markid = -1;
  2104. ++m_position;
  2105. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2106. this->m_pdata->m_data.align();
  2107. m_alt_insert_point = this->m_pdata->m_data.size();
  2108. break;
  2109. case regex_constants::syntax_not:
  2110. pb->index = markid = -2;
  2111. ++m_position;
  2112. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2113. this->m_pdata->m_data.align();
  2114. m_alt_insert_point = this->m_pdata->m_data.size();
  2115. break;
  2116. case regex_constants::escape_type_left_word:
  2117. {
  2118. // a lookbehind assertion:
  2119. if(++m_position == m_end)
  2120. {
  2121. // Rewind to start of (? sequence:
  2122. --m_position;
  2123. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2124. fail(regex_constants::error_perl_extension, m_position - m_base);
  2125. return false;
  2126. }
  2127. regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
  2128. if(t == regex_constants::syntax_not)
  2129. pb->index = markid = -2;
  2130. else if(t == regex_constants::syntax_equal)
  2131. pb->index = markid = -1;
  2132. else
  2133. {
  2134. // Probably a named capture which also starts (?< :
  2135. name_delim = '>';
  2136. --m_position;
  2137. goto named_capture_jump;
  2138. }
  2139. ++m_position;
  2140. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2141. this->append_state(syntax_element_backstep, sizeof(re_brace));
  2142. this->m_pdata->m_data.align();
  2143. m_alt_insert_point = this->m_pdata->m_data.size();
  2144. break;
  2145. }
  2146. case regex_constants::escape_type_right_word:
  2147. //
  2148. // an independent sub-expression:
  2149. //
  2150. pb->index = markid = -3;
  2151. ++m_position;
  2152. jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
  2153. this->m_pdata->m_data.align();
  2154. m_alt_insert_point = this->m_pdata->m_data.size();
  2155. break;
  2156. case regex_constants::syntax_open_mark:
  2157. {
  2158. // a conditional expression:
  2159. pb->index = markid = -4;
  2160. if(++m_position == m_end)
  2161. {
  2162. // Rewind to start of (? sequence:
  2163. --m_position;
  2164. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2165. fail(regex_constants::error_perl_extension, m_position - m_base);
  2166. return false;
  2167. }
  2168. v = this->m_traits.toi(m_position, m_end, 10);
  2169. if(m_position == m_end)
  2170. {
  2171. // Rewind to start of (? sequence:
  2172. --m_position;
  2173. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2174. fail(regex_constants::error_perl_extension, m_position - m_base);
  2175. return false;
  2176. }
  2177. if(*m_position == charT('R'))
  2178. {
  2179. if(++m_position == m_end)
  2180. {
  2181. // Rewind to start of (? sequence:
  2182. --m_position;
  2183. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2184. fail(regex_constants::error_perl_extension, m_position - m_base);
  2185. return false;
  2186. }
  2187. if(*m_position == charT('&'))
  2188. {
  2189. const charT* base = ++m_position;
  2190. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2191. ++m_position;
  2192. if(m_position == m_end)
  2193. {
  2194. // Rewind to start of (? sequence:
  2195. --m_position;
  2196. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2197. fail(regex_constants::error_perl_extension, m_position - m_base);
  2198. return false;
  2199. }
  2200. v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
  2201. }
  2202. else
  2203. {
  2204. v = -this->m_traits.toi(m_position, m_end, 10);
  2205. }
  2206. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2207. br->index = v < 0 ? (int)(v - 1) : 0;
  2208. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2209. {
  2210. // Rewind to start of (? sequence:
  2211. --m_position;
  2212. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2213. fail(regex_constants::error_perl_extension, m_position - m_base);
  2214. return false;
  2215. }
  2216. if(++m_position == m_end)
  2217. {
  2218. // Rewind to start of (? sequence:
  2219. --m_position;
  2220. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2221. fail(regex_constants::error_perl_extension, m_position - m_base);
  2222. return false;
  2223. }
  2224. }
  2225. else if((*m_position == charT('\'')) || (*m_position == charT('<')))
  2226. {
  2227. const charT* base = ++m_position;
  2228. while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
  2229. ++m_position;
  2230. if(m_position == m_end)
  2231. {
  2232. // Rewind to start of (? sequence:
  2233. --m_position;
  2234. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2235. fail(regex_constants::error_perl_extension, m_position - m_base);
  2236. return false;
  2237. }
  2238. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2239. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2240. br->index = (int)v;
  2241. if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
  2242. {
  2243. // Rewind to start of (? sequence:
  2244. --m_position;
  2245. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2246. fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
  2247. return false;
  2248. }
  2249. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2250. {
  2251. // Rewind to start of (? sequence:
  2252. --m_position;
  2253. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2254. fail(regex_constants::error_perl_extension, m_position - m_base);
  2255. return false;
  2256. }
  2257. if(++m_position == m_end)
  2258. {
  2259. // Rewind to start of (? sequence:
  2260. --m_position;
  2261. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2262. fail(regex_constants::error_perl_extension, m_position - m_base);
  2263. return false;
  2264. }
  2265. }
  2266. else if(*m_position == charT('D'))
  2267. {
  2268. const char* def = "DEFINE";
  2269. while(*def && (m_position != m_end) && (*m_position == charT(*def)))
  2270. ++m_position, ++def;
  2271. if((m_position == m_end) || *def)
  2272. {
  2273. // Rewind to start of (? sequence:
  2274. --m_position;
  2275. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2276. fail(regex_constants::error_perl_extension, m_position - m_base);
  2277. return false;
  2278. }
  2279. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2280. br->index = 9999; // special magic value!
  2281. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2282. {
  2283. // Rewind to start of (? sequence:
  2284. --m_position;
  2285. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2286. fail(regex_constants::error_perl_extension, m_position - m_base);
  2287. return false;
  2288. }
  2289. if(++m_position == m_end)
  2290. {
  2291. // Rewind to start of (? sequence:
  2292. --m_position;
  2293. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2294. fail(regex_constants::error_perl_extension, m_position - m_base);
  2295. return false;
  2296. }
  2297. }
  2298. else if(v > 0)
  2299. {
  2300. re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
  2301. br->index = (int)v;
  2302. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2303. {
  2304. // Rewind to start of (? sequence:
  2305. --m_position;
  2306. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2307. fail(regex_constants::error_perl_extension, m_position - m_base);
  2308. return false;
  2309. }
  2310. if(++m_position == m_end)
  2311. {
  2312. // Rewind to start of (? sequence:
  2313. --m_position;
  2314. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2315. fail(regex_constants::error_perl_extension, m_position - m_base);
  2316. return false;
  2317. }
  2318. }
  2319. else
  2320. {
  2321. // verify that we have a lookahead or lookbehind assert:
  2322. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
  2323. {
  2324. // Rewind to start of (? sequence:
  2325. --m_position;
  2326. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2327. fail(regex_constants::error_perl_extension, m_position - m_base);
  2328. return false;
  2329. }
  2330. if(++m_position == m_end)
  2331. {
  2332. // Rewind to start of (? sequence:
  2333. --m_position;
  2334. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2335. fail(regex_constants::error_perl_extension, m_position - m_base);
  2336. return false;
  2337. }
  2338. if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
  2339. {
  2340. if(++m_position == m_end)
  2341. {
  2342. // Rewind to start of (? sequence:
  2343. --m_position;
  2344. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2345. fail(regex_constants::error_perl_extension, m_position - m_base);
  2346. return false;
  2347. }
  2348. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2349. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2350. {
  2351. // Rewind to start of (? sequence:
  2352. --m_position;
  2353. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2354. fail(regex_constants::error_perl_extension, m_position - m_base);
  2355. return false;
  2356. }
  2357. m_position -= 3;
  2358. }
  2359. else
  2360. {
  2361. if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
  2362. && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
  2363. {
  2364. // Rewind to start of (? sequence:
  2365. --m_position;
  2366. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2367. fail(regex_constants::error_perl_extension, m_position - m_base);
  2368. return false;
  2369. }
  2370. m_position -= 2;
  2371. }
  2372. }
  2373. break;
  2374. }
  2375. case regex_constants::syntax_close_mark:
  2376. // Rewind to start of (? sequence:
  2377. --m_position;
  2378. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2379. fail(regex_constants::error_perl_extension, m_position - m_base);
  2380. return false;
  2381. case regex_constants::escape_type_end_buffer:
  2382. {
  2383. name_delim = *m_position;
  2384. named_capture_jump:
  2385. markid = 0;
  2386. if(0 == (this->flags() & regbase::nosubs))
  2387. {
  2388. markid = ++m_mark_count;
  2389. if(this->flags() & regbase::save_subexpression_location)
  2390. this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
  2391. }
  2392. pb->index = markid;
  2393. const charT* base = ++m_position;
  2394. if(m_position == m_end)
  2395. {
  2396. // Rewind to start of (? sequence:
  2397. --m_position;
  2398. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2399. fail(regex_constants::error_perl_extension, m_position - m_base);
  2400. return false;
  2401. }
  2402. while((m_position != m_end) && (*m_position != name_delim))
  2403. ++m_position;
  2404. if(m_position == m_end)
  2405. {
  2406. // Rewind to start of (? sequence:
  2407. --m_position;
  2408. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2409. fail(regex_constants::error_perl_extension, m_position - m_base);
  2410. return false;
  2411. }
  2412. this->m_pdata->set_name(base, m_position, markid);
  2413. ++m_position;
  2414. break;
  2415. }
  2416. default:
  2417. if(*m_position == charT('R'))
  2418. {
  2419. ++m_position;
  2420. v = 0;
  2421. if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
  2422. {
  2423. // Rewind to start of (? sequence:
  2424. --m_position;
  2425. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2426. fail(regex_constants::error_perl_extension, m_position - m_base);
  2427. return false;
  2428. }
  2429. goto insert_recursion;
  2430. }
  2431. if(*m_position == charT('&'))
  2432. {
  2433. ++m_position;
  2434. const charT* base = m_position;
  2435. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2436. ++m_position;
  2437. if(m_position == m_end)
  2438. {
  2439. // Rewind to start of (? sequence:
  2440. --m_position;
  2441. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2442. fail(regex_constants::error_perl_extension, m_position - m_base);
  2443. return false;
  2444. }
  2445. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2446. goto insert_recursion;
  2447. }
  2448. if(*m_position == charT('P'))
  2449. {
  2450. ++m_position;
  2451. if(m_position == m_end)
  2452. {
  2453. // Rewind to start of (? sequence:
  2454. --m_position;
  2455. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2456. fail(regex_constants::error_perl_extension, m_position - m_base);
  2457. return false;
  2458. }
  2459. if(*m_position == charT('>'))
  2460. {
  2461. ++m_position;
  2462. const charT* base = m_position;
  2463. while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2464. ++m_position;
  2465. if(m_position == m_end)
  2466. {
  2467. // Rewind to start of (? sequence:
  2468. --m_position;
  2469. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2470. fail(regex_constants::error_perl_extension, m_position - m_base);
  2471. return false;
  2472. }
  2473. v = static_cast<int>(hash_value_from_capture_name(base, m_position));
  2474. goto insert_recursion;
  2475. }
  2476. }
  2477. //
  2478. // lets assume that we have a (?imsx) group and try and parse it:
  2479. //
  2480. option_group_jump:
  2481. regex_constants::syntax_option_type opts = parse_options();
  2482. if(m_position == m_end)
  2483. {
  2484. // Rewind to start of (? sequence:
  2485. --m_position;
  2486. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2487. fail(regex_constants::error_perl_extension, m_position - m_base);
  2488. return false;
  2489. }
  2490. // make a note of whether we have a case change:
  2491. m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
  2492. pb->index = markid = 0;
  2493. if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
  2494. {
  2495. // update flags and carry on as normal:
  2496. this->flags(opts);
  2497. restore_flags = false;
  2498. old_case_change |= m_has_case_change; // defer end of scope by one ')'
  2499. }
  2500. else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
  2501. {
  2502. // update flags and carry on until the matching ')' is found:
  2503. this->flags(opts);
  2504. ++m_position;
  2505. }
  2506. else
  2507. {
  2508. // Rewind to start of (? sequence:
  2509. --m_position;
  2510. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2511. fail(regex_constants::error_perl_extension, m_position - m_base);
  2512. return false;
  2513. }
  2514. // finally append a case change state if we need it:
  2515. if(m_has_case_change)
  2516. {
  2517. static_cast<re_case*>(
  2518. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2519. )->icase = opts & regbase::icase;
  2520. }
  2521. }
  2522. //
  2523. // now recursively add more states, this will terminate when we get to a
  2524. // matching ')' :
  2525. //
  2526. parse_all();
  2527. //
  2528. // Unwind alternatives:
  2529. //
  2530. if(0 == unwind_alts(last_paren_start))
  2531. {
  2532. // Rewind to start of (? sequence:
  2533. --m_position;
  2534. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2535. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
  2536. return false;
  2537. }
  2538. //
  2539. // we either have a ')' or we have run out of characters prematurely:
  2540. //
  2541. if(m_position == m_end)
  2542. {
  2543. // Rewind to start of (? sequence:
  2544. --m_position;
  2545. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2546. this->fail(regex_constants::error_paren, std::distance(m_base, m_end));
  2547. return false;
  2548. }
  2549. BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
  2550. ++m_position;
  2551. //
  2552. // restore the flags:
  2553. //
  2554. if(restore_flags)
  2555. {
  2556. // append a case change state if we need it:
  2557. if(m_has_case_change)
  2558. {
  2559. static_cast<re_case*>(
  2560. this->append_state(syntax_element_toggle_case, sizeof(re_case))
  2561. )->icase = old_flags & regbase::icase;
  2562. }
  2563. this->flags(old_flags);
  2564. }
  2565. //
  2566. // set up the jump pointer if we have one:
  2567. //
  2568. if(jump_offset)
  2569. {
  2570. this->m_pdata->m_data.align();
  2571. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  2572. jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
  2573. if((this->m_last_state == jmp) && (markid != -2))
  2574. {
  2575. // Oops... we didn't have anything inside the assertion.
  2576. // Note we don't get here for negated forward lookahead as (?!)
  2577. // does have some uses.
  2578. // Rewind to start of (? sequence:
  2579. --m_position;
  2580. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2581. fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
  2582. return false;
  2583. }
  2584. }
  2585. //
  2586. // verify that if this is conditional expression, that we do have
  2587. // an alternative, if not add one:
  2588. //
  2589. if(markid == -4)
  2590. {
  2591. re_syntax_base* b = this->getaddress(expected_alt_point);
  2592. // Make sure we have exactly one alternative following this state:
  2593. if(b->type != syntax_element_alt)
  2594. {
  2595. re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
  2596. alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
  2597. }
  2598. else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
  2599. {
  2600. // Can't have seen more than one alternative:
  2601. // Rewind to start of (? sequence:
  2602. --m_position;
  2603. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2604. fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
  2605. return false;
  2606. }
  2607. else
  2608. {
  2609. // We must *not* have seen an alternative inside a (DEFINE) block:
  2610. b = this->getaddress(b->next.i, b);
  2611. if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
  2612. {
  2613. // Rewind to start of (? sequence:
  2614. --m_position;
  2615. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2616. fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
  2617. return false;
  2618. }
  2619. }
  2620. // check for invalid repetition of next state:
  2621. b = this->getaddress(expected_alt_point);
  2622. b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
  2623. if((b->type != syntax_element_assert_backref)
  2624. && (b->type != syntax_element_startmark))
  2625. {
  2626. // Rewind to start of (? sequence:
  2627. --m_position;
  2628. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2629. fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
  2630. return false;
  2631. }
  2632. }
  2633. //
  2634. // append closing parenthesis state:
  2635. //
  2636. pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
  2637. pb->index = markid;
  2638. pb->icase = this->flags() & regbase::icase;
  2639. this->m_paren_start = last_paren_start;
  2640. //
  2641. // restore the alternate insertion point:
  2642. //
  2643. this->m_alt_insert_point = last_alt_point;
  2644. //
  2645. // and the case change data:
  2646. //
  2647. m_has_case_change = old_case_change;
  2648. //
  2649. // And the mark_reset data:
  2650. //
  2651. if(m_max_mark > m_mark_count)
  2652. {
  2653. m_mark_count = m_max_mark;
  2654. }
  2655. m_mark_reset = mark_reset;
  2656. m_max_mark = max_mark;
  2657. if(markid > 0)
  2658. {
  2659. if(this->flags() & regbase::save_subexpression_location)
  2660. this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
  2661. }
  2662. return true;
  2663. }
  2664. template <class charT, class traits>
  2665. bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
  2666. {
  2667. while(*verb)
  2668. {
  2669. if(static_cast<charT>(*verb) != *m_position)
  2670. {
  2671. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2672. fail(regex_constants::error_perl_extension, m_position - m_base);
  2673. return false;
  2674. }
  2675. if(++m_position == m_end)
  2676. {
  2677. --m_position;
  2678. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2679. fail(regex_constants::error_perl_extension, m_position - m_base);
  2680. return false;
  2681. }
  2682. ++verb;
  2683. }
  2684. return true;
  2685. }
  2686. #ifdef BOOST_REGEX_MSVC
  2687. # pragma warning(push)
  2688. #if BOOST_REGEX_MSVC >= 1800
  2689. #pragma warning(disable:26812)
  2690. #endif
  2691. #endif
  2692. template <class charT, class traits>
  2693. bool basic_regex_parser<charT, traits>::parse_perl_verb()
  2694. {
  2695. if(++m_position == m_end)
  2696. {
  2697. // Rewind to start of (* sequence:
  2698. --m_position;
  2699. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2700. fail(regex_constants::error_perl_extension, m_position - m_base);
  2701. return false;
  2702. }
  2703. switch(*m_position)
  2704. {
  2705. case 'F':
  2706. if(++m_position == m_end)
  2707. {
  2708. // Rewind to start of (* sequence:
  2709. --m_position;
  2710. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2711. fail(regex_constants::error_perl_extension, m_position - m_base);
  2712. return false;
  2713. }
  2714. if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
  2715. {
  2716. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2717. {
  2718. // Rewind to start of (* sequence:
  2719. --m_position;
  2720. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2721. fail(regex_constants::error_perl_extension, m_position - m_base);
  2722. return false;
  2723. }
  2724. ++m_position;
  2725. this->append_state(syntax_element_fail);
  2726. return true;
  2727. }
  2728. break;
  2729. case 'A':
  2730. if(++m_position == m_end)
  2731. {
  2732. // Rewind to start of (* sequence:
  2733. --m_position;
  2734. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2735. fail(regex_constants::error_perl_extension, m_position - m_base);
  2736. return false;
  2737. }
  2738. if(match_verb("CCEPT"))
  2739. {
  2740. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2741. {
  2742. // Rewind to start of (* sequence:
  2743. --m_position;
  2744. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2745. fail(regex_constants::error_perl_extension, m_position - m_base);
  2746. return false;
  2747. }
  2748. ++m_position;
  2749. this->append_state(syntax_element_accept);
  2750. return true;
  2751. }
  2752. break;
  2753. case 'C':
  2754. if(++m_position == m_end)
  2755. {
  2756. // Rewind to start of (* sequence:
  2757. --m_position;
  2758. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2759. fail(regex_constants::error_perl_extension, m_position - m_base);
  2760. return false;
  2761. }
  2762. if(match_verb("OMMIT"))
  2763. {
  2764. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2765. {
  2766. // Rewind to start of (* sequence:
  2767. --m_position;
  2768. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2769. fail(regex_constants::error_perl_extension, m_position - m_base);
  2770. return false;
  2771. }
  2772. ++m_position;
  2773. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
  2774. this->m_pdata->m_disable_match_any = true;
  2775. return true;
  2776. }
  2777. break;
  2778. case 'P':
  2779. if(++m_position == m_end)
  2780. {
  2781. // Rewind to start of (* sequence:
  2782. --m_position;
  2783. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2784. fail(regex_constants::error_perl_extension, m_position - m_base);
  2785. return false;
  2786. }
  2787. if(match_verb("RUNE"))
  2788. {
  2789. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2790. {
  2791. // Rewind to start of (* sequence:
  2792. --m_position;
  2793. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2794. fail(regex_constants::error_perl_extension, m_position - m_base);
  2795. return false;
  2796. }
  2797. ++m_position;
  2798. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
  2799. this->m_pdata->m_disable_match_any = true;
  2800. return true;
  2801. }
  2802. break;
  2803. case 'S':
  2804. if(++m_position == m_end)
  2805. {
  2806. // Rewind to start of (* sequence:
  2807. --m_position;
  2808. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2809. fail(regex_constants::error_perl_extension, m_position - m_base);
  2810. return false;
  2811. }
  2812. if(match_verb("KIP"))
  2813. {
  2814. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2815. {
  2816. // Rewind to start of (* sequence:
  2817. --m_position;
  2818. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2819. fail(regex_constants::error_perl_extension, m_position - m_base);
  2820. return false;
  2821. }
  2822. ++m_position;
  2823. static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
  2824. this->m_pdata->m_disable_match_any = true;
  2825. return true;
  2826. }
  2827. break;
  2828. case 'T':
  2829. if(++m_position == m_end)
  2830. {
  2831. // Rewind to start of (* sequence:
  2832. --m_position;
  2833. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2834. fail(regex_constants::error_perl_extension, m_position - m_base);
  2835. return false;
  2836. }
  2837. if(match_verb("HEN"))
  2838. {
  2839. if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
  2840. {
  2841. // Rewind to start of (* sequence:
  2842. --m_position;
  2843. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2844. fail(regex_constants::error_perl_extension, m_position - m_base);
  2845. return false;
  2846. }
  2847. ++m_position;
  2848. this->append_state(syntax_element_then);
  2849. this->m_pdata->m_disable_match_any = true;
  2850. return true;
  2851. }
  2852. break;
  2853. }
  2854. // Rewind to start of (* sequence:
  2855. --m_position;
  2856. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2857. fail(regex_constants::error_perl_extension, m_position - m_base);
  2858. return false;
  2859. }
  2860. #ifdef BOOST_REGEX_MSVC
  2861. # pragma warning(pop)
  2862. #endif
  2863. template <class charT, class traits>
  2864. bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
  2865. {
  2866. //
  2867. // parses an emacs style \sx or \Sx construct.
  2868. //
  2869. if(++m_position == m_end)
  2870. {
  2871. // Rewind to start of sequence:
  2872. --m_position;
  2873. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
  2874. fail(regex_constants::error_escape, m_position - m_base);
  2875. return false;
  2876. }
  2877. basic_char_set<charT, traits> char_set;
  2878. if(negate)
  2879. char_set.negate();
  2880. static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
  2881. switch(*m_position)
  2882. {
  2883. case 's':
  2884. case ' ':
  2885. char_set.add_class(this->m_mask_space);
  2886. break;
  2887. case 'w':
  2888. char_set.add_class(this->m_word_mask);
  2889. break;
  2890. case '_':
  2891. char_set.add_single(digraph<charT>(charT('$')));
  2892. char_set.add_single(digraph<charT>(charT('&')));
  2893. char_set.add_single(digraph<charT>(charT('*')));
  2894. char_set.add_single(digraph<charT>(charT('+')));
  2895. char_set.add_single(digraph<charT>(charT('-')));
  2896. char_set.add_single(digraph<charT>(charT('_')));
  2897. char_set.add_single(digraph<charT>(charT('<')));
  2898. char_set.add_single(digraph<charT>(charT('>')));
  2899. break;
  2900. case '.':
  2901. char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
  2902. break;
  2903. case '(':
  2904. char_set.add_single(digraph<charT>(charT('(')));
  2905. char_set.add_single(digraph<charT>(charT('[')));
  2906. char_set.add_single(digraph<charT>(charT('{')));
  2907. break;
  2908. case ')':
  2909. char_set.add_single(digraph<charT>(charT(')')));
  2910. char_set.add_single(digraph<charT>(charT(']')));
  2911. char_set.add_single(digraph<charT>(charT('}')));
  2912. break;
  2913. case '"':
  2914. char_set.add_single(digraph<charT>(charT('"')));
  2915. char_set.add_single(digraph<charT>(charT('\'')));
  2916. char_set.add_single(digraph<charT>(charT('`')));
  2917. break;
  2918. case '\'':
  2919. char_set.add_single(digraph<charT>(charT('\'')));
  2920. char_set.add_single(digraph<charT>(charT(',')));
  2921. char_set.add_single(digraph<charT>(charT('#')));
  2922. break;
  2923. case '<':
  2924. char_set.add_single(digraph<charT>(charT(';')));
  2925. break;
  2926. case '>':
  2927. char_set.add_single(digraph<charT>(charT('\n')));
  2928. char_set.add_single(digraph<charT>(charT('\f')));
  2929. break;
  2930. default:
  2931. fail(regex_constants::error_ctype, m_position - m_base);
  2932. return false;
  2933. }
  2934. if(0 == this->append_set(char_set))
  2935. {
  2936. fail(regex_constants::error_ctype, m_position - m_base);
  2937. return false;
  2938. }
  2939. ++m_position;
  2940. return true;
  2941. }
  2942. template <class charT, class traits>
  2943. regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
  2944. {
  2945. // we have a (?imsx-imsx) group, convert it into a set of flags:
  2946. regex_constants::syntax_option_type f = this->flags();
  2947. bool breakout = false;
  2948. do
  2949. {
  2950. switch(*m_position)
  2951. {
  2952. case 's':
  2953. f |= regex_constants::mod_s;
  2954. f &= ~regex_constants::no_mod_s;
  2955. break;
  2956. case 'm':
  2957. f &= ~regex_constants::no_mod_m;
  2958. break;
  2959. case 'i':
  2960. f |= regex_constants::icase;
  2961. break;
  2962. case 'x':
  2963. f |= regex_constants::mod_x;
  2964. break;
  2965. default:
  2966. breakout = true;
  2967. continue;
  2968. }
  2969. if(++m_position == m_end)
  2970. {
  2971. // Rewind to start of (? sequence:
  2972. --m_position;
  2973. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2974. fail(regex_constants::error_paren, m_position - m_base);
  2975. return false;
  2976. }
  2977. }
  2978. while(!breakout);
  2979. breakout = false;
  2980. if(*m_position == static_cast<charT>('-'))
  2981. {
  2982. if(++m_position == m_end)
  2983. {
  2984. // Rewind to start of (? sequence:
  2985. --m_position;
  2986. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  2987. fail(regex_constants::error_paren, m_position - m_base);
  2988. return false;
  2989. }
  2990. do
  2991. {
  2992. switch(*m_position)
  2993. {
  2994. case 's':
  2995. f &= ~regex_constants::mod_s;
  2996. f |= regex_constants::no_mod_s;
  2997. break;
  2998. case 'm':
  2999. f |= regex_constants::no_mod_m;
  3000. break;
  3001. case 'i':
  3002. f &= ~regex_constants::icase;
  3003. break;
  3004. case 'x':
  3005. f &= ~regex_constants::mod_x;
  3006. break;
  3007. default:
  3008. breakout = true;
  3009. continue;
  3010. }
  3011. if(++m_position == m_end)
  3012. {
  3013. // Rewind to start of (? sequence:
  3014. --m_position;
  3015. while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
  3016. fail(regex_constants::error_paren, m_position - m_base);
  3017. return false;
  3018. }
  3019. }
  3020. while(!breakout);
  3021. }
  3022. return f;
  3023. }
  3024. template <class charT, class traits>
  3025. bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
  3026. {
  3027. //
  3028. // If we didn't actually add any states after the last
  3029. // alternative then that's an error:
  3030. //
  3031. if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
  3032. && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)
  3033. &&
  3034. !(
  3035. ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
  3036. &&
  3037. ((this->flags() & regbase::no_empty_expressions) == 0)
  3038. )
  3039. )
  3040. {
  3041. fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
  3042. return false;
  3043. }
  3044. //
  3045. // Fix up our alternatives:
  3046. //
  3047. while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start))
  3048. {
  3049. //
  3050. // fix up the jump to point to the end of the states
  3051. // that we've just added:
  3052. //
  3053. std::ptrdiff_t jump_offset = m_alt_jumps.back();
  3054. m_alt_jumps.pop_back();
  3055. this->m_pdata->m_data.align();
  3056. re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
  3057. if (jmp->type != syntax_element_jump)
  3058. {
  3059. // Something really bad happened, this used to be an assert,
  3060. // but we'll make it an error just in case we should ever get here.
  3061. fail(regex_constants::error_unknown, this->m_position - this->m_base, "Internal logic failed while compiling the expression, probably you added a repeat to something non-repeatable!");
  3062. return false;
  3063. }
  3064. jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
  3065. }
  3066. return true;
  3067. }
  3068. #ifdef BOOST_REGEX_MSVC
  3069. #pragma warning(pop)
  3070. #endif
  3071. } // namespace BOOST_REGEX_DETAIL_NS
  3072. } // namespace boost
  3073. #endif