basic_parser_impl.hpp 91 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_IMPL_HPP
  11. #define BOOST_JSON_BASIC_PARSER_IMPL_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/basic_parser.hpp>
  14. #include <boost/json/error.hpp>
  15. #include <boost/json/detail/buffer.hpp>
  16. #include <boost/json/detail/charconv/from_chars.hpp>
  17. #include <boost/json/detail/sse2.hpp>
  18. #include <boost/mp11/algorithm.hpp>
  19. #include <boost/mp11/integral.hpp>
  20. #include <cmath>
  21. #include <limits>
  22. #include <cstring>
  23. #ifdef _MSC_VER
  24. #pragma warning(push)
  25. #pragma warning(disable: 4702) // unreachable code
  26. #pragma warning(disable: 4127) // conditional expression is constant
  27. #endif
  28. /* This file must be manually included to get the
  29. function template definitions for basic_parser.
  30. */
  31. /* Reference:
  32. https://www.json.org/
  33. RFC 7159: The JavaScript Object Notation (JSON) Data Interchange Format
  34. https://tools.ietf.org/html/rfc7159
  35. https://ampl.com/netlib/fp/dtoa.c
  36. */
  37. #ifndef BOOST_JSON_DOCS
  38. namespace boost {
  39. namespace json {
  40. namespace detail {
  41. inline
  42. double
  43. pow10(int exp) noexcept
  44. {
  45. static double const tab[618] = {
  46. 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301,
  47. 1e-300, 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
  48. 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281,
  49. 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271,
  50. 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262, 1e-261,
  51. 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253, 1e-252, 1e-251,
  52. 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244, 1e-243, 1e-242, 1e-241,
  53. 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231,
  54. 1e-230, 1e-229, 1e-228, 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221,
  55. 1e-220, 1e-219, 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211,
  56. 1e-210, 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201,
  57. 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191,
  58. 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181,
  59. 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172, 1e-171,
  60. 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163, 1e-162, 1e-161,
  61. 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154, 1e-153, 1e-152, 1e-151,
  62. 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141,
  63. 1e-140, 1e-139, 1e-138, 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131,
  64. 1e-130, 1e-129, 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121,
  65. 1e-120, 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
  66. 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101,
  67. 1e-100, 1e-099, 1e-098, 1e-097, 1e-096, 1e-095, 1e-094, 1e-093, 1e-092, 1e-091,
  68. 1e-090, 1e-089, 1e-088, 1e-087, 1e-086, 1e-085, 1e-084, 1e-083, 1e-082, 1e-081,
  69. 1e-080, 1e-079, 1e-078, 1e-077, 1e-076, 1e-075, 1e-074, 1e-073, 1e-072, 1e-071,
  70. 1e-070, 1e-069, 1e-068, 1e-067, 1e-066, 1e-065, 1e-064, 1e-063, 1e-062, 1e-061,
  71. 1e-060, 1e-059, 1e-058, 1e-057, 1e-056, 1e-055, 1e-054, 1e-053, 1e-052, 1e-051,
  72. 1e-050, 1e-049, 1e-048, 1e-047, 1e-046, 1e-045, 1e-044, 1e-043, 1e-042, 1e-041,
  73. 1e-040, 1e-039, 1e-038, 1e-037, 1e-036, 1e-035, 1e-034, 1e-033, 1e-032, 1e-031,
  74. 1e-030, 1e-029, 1e-028, 1e-027, 1e-026, 1e-025, 1e-024, 1e-023, 1e-022, 1e-021,
  75. 1e-020, 1e-019, 1e-018, 1e-017, 1e-016, 1e-015, 1e-014, 1e-013, 1e-012, 1e-011,
  76. 1e-010, 1e-009, 1e-008, 1e-007, 1e-006, 1e-005, 1e-004, 1e-003, 1e-002, 1e-001,
  77. 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009,
  78. 1e+010, 1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019,
  79. 1e+020, 1e+021, 1e+022, 1e+023, 1e+024, 1e+025, 1e+026, 1e+027, 1e+028, 1e+029,
  80. 1e+030, 1e+031, 1e+032, 1e+033, 1e+034, 1e+035, 1e+036, 1e+037, 1e+038, 1e+039,
  81. 1e+040, 1e+041, 1e+042, 1e+043, 1e+044, 1e+045, 1e+046, 1e+047, 1e+048, 1e+049,
  82. 1e+050, 1e+051, 1e+052, 1e+053, 1e+054, 1e+055, 1e+056, 1e+057, 1e+058, 1e+059,
  83. 1e+060, 1e+061, 1e+062, 1e+063, 1e+064, 1e+065, 1e+066, 1e+067, 1e+068, 1e+069,
  84. 1e+070, 1e+071, 1e+072, 1e+073, 1e+074, 1e+075, 1e+076, 1e+077, 1e+078, 1e+079,
  85. 1e+080, 1e+081, 1e+082, 1e+083, 1e+084, 1e+085, 1e+086, 1e+087, 1e+088, 1e+089,
  86. 1e+090, 1e+091, 1e+092, 1e+093, 1e+094, 1e+095, 1e+096, 1e+097, 1e+098, 1e+099,
  87. 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109,
  88. 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119,
  89. 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129,
  90. 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139,
  91. 1e+140, 1e+141, 1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149,
  92. 1e+150, 1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159,
  93. 1e+160, 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169,
  94. 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179,
  95. 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, 1e+189,
  96. 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199,
  97. 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, 1e+209,
  98. 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219,
  99. 1e+220, 1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229,
  100. 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239,
  101. 1e+240, 1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249,
  102. 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259,
  103. 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269,
  104. 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279,
  105. 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, 1e+289,
  106. 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299,
  107. 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307, 1e+308 };
  108. if( exp > 308 )
  109. {
  110. return std::numeric_limits<double>::infinity();
  111. }
  112. else if( exp < -308 )
  113. {
  114. // due to the way pow10 is used by dec_to_float,
  115. // we can afford to return 0.0 here
  116. return 0.0;
  117. }
  118. else
  119. {
  120. exp += 308;
  121. BOOST_ASSERT(exp >= 0 && exp < 618);
  122. return tab[exp];
  123. }
  124. }
  125. inline
  126. double
  127. dec_to_float(
  128. std::uint64_t m,
  129. std::int32_t e,
  130. bool neg) noexcept
  131. {
  132. // convert to double explicitly to silence warnings
  133. double x = static_cast<double>(m);
  134. if(neg)
  135. x = -x;
  136. if(e < -305)
  137. {
  138. x *= 1e-305 ;
  139. e += 305;
  140. }
  141. if(e >= -22 && e < 0)
  142. return x / pow10(-e);
  143. return x * pow10(e);
  144. }
  145. inline
  146. bool
  147. is_control(char c) noexcept
  148. {
  149. return static_cast<unsigned char>(c) < 32;
  150. }
  151. inline
  152. int
  153. hex_digit(unsigned char c) noexcept
  154. {
  155. // by Peter Dimov
  156. if( c >= '0' && c <= '9' )
  157. return c - '0';
  158. c &= ~0x20;
  159. if( c >= 'A' && c <= 'F' )
  160. return 10 + c - 'A';
  161. return -1;
  162. }
  163. enum json_literal
  164. {
  165. null_literal = 0,
  166. true_literal,
  167. false_literal,
  168. infinity_literal,
  169. neg_infinity_literal,
  170. nan_literal,
  171. resume_literal = -1
  172. };
  173. } // detail
  174. //----------------------------------------------------------
  175. template< class Handler >
  176. template< bool StackEmpty_, char First_ >
  177. struct basic_parser<Handler>::
  178. parse_number_helper
  179. {
  180. basic_parser* parser;
  181. char const* p;
  182. template< std::size_t N >
  183. char const*
  184. operator()( mp11::mp_size_t<N> ) const
  185. {
  186. return parser->parse_number(
  187. p,
  188. std::integral_constant<bool, StackEmpty_>(),
  189. std::integral_constant<char, First_>(),
  190. std::integral_constant<
  191. number_precision, static_cast<number_precision>(N)>() );
  192. }
  193. };
  194. //----------------------------------------------------------
  195. template<class Handler>
  196. void
  197. basic_parser<Handler>::
  198. reserve()
  199. {
  200. if(BOOST_JSON_LIKELY(
  201. ! st_.empty()))
  202. return;
  203. // Reserve the largest stack we need,
  204. // to avoid reallocation during suspend.
  205. st_.reserve(
  206. sizeof(state) + // document parsing state
  207. (sizeof(state) +
  208. sizeof(std::size_t)) * depth() + // array and object state + size
  209. sizeof(state) + // value parsing state
  210. sizeof(std::size_t) + // string size
  211. sizeof(state)); // comment state
  212. }
  213. //----------------------------------------------------------
  214. //
  215. // The sentinel value is returned by parse functions
  216. // to indicate that the parser failed, or suspended.
  217. // this is used as it is distinct from all valid values
  218. // for data in write
  219. template<class Handler>
  220. const char*
  221. basic_parser<Handler>::
  222. sentinel()
  223. {
  224. // the "+1" ensures that the returned pointer is unique even if
  225. // the given input buffer borders on this object
  226. return reinterpret_cast<
  227. const char*>(this) + 1;
  228. }
  229. template<class Handler>
  230. bool
  231. basic_parser<Handler>::
  232. incomplete(
  233. const detail::const_stream_wrapper& cs)
  234. {
  235. return cs.begin() == sentinel();
  236. }
  237. //----------------------------------------------------------
  238. //
  239. // These functions are declared with the BOOST_NOINLINE
  240. // attribute to avoid polluting the parsers hot-path.
  241. // They return the canary value to indicate suspension
  242. // or failure.
  243. template<class Handler>
  244. const char*
  245. basic_parser<Handler>::
  246. suspend_or_fail(state st)
  247. {
  248. if(BOOST_JSON_LIKELY(
  249. ! ec_ && more_))
  250. {
  251. // suspend
  252. reserve();
  253. st_.push_unchecked(st);
  254. }
  255. return sentinel();
  256. }
  257. template<class Handler>
  258. const char*
  259. basic_parser<Handler>::
  260. suspend_or_fail(
  261. state st,
  262. std::size_t n)
  263. {
  264. if(BOOST_JSON_LIKELY(
  265. ! ec_ && more_))
  266. {
  267. // suspend
  268. reserve();
  269. st_.push_unchecked(n);
  270. st_.push_unchecked(st);
  271. }
  272. return sentinel();
  273. }
  274. template<class Handler>
  275. const char*
  276. basic_parser<Handler>::
  277. fail(const char* p) noexcept
  278. {
  279. BOOST_ASSERT( p != sentinel() );
  280. end_ = p;
  281. return sentinel();
  282. }
  283. template<class Handler>
  284. const char*
  285. basic_parser<Handler>::
  286. fail(
  287. const char* p,
  288. error ev,
  289. source_location const* loc) noexcept
  290. {
  291. BOOST_ASSERT( p != sentinel() );
  292. end_ = p;
  293. ec_.assign(ev, loc);
  294. return sentinel();
  295. }
  296. template<class Handler>
  297. const char*
  298. basic_parser<Handler>::
  299. maybe_suspend(
  300. const char* p,
  301. state st)
  302. {
  303. if( p != sentinel() )
  304. end_ = p;
  305. if(BOOST_JSON_LIKELY(more_))
  306. {
  307. // suspend
  308. reserve();
  309. st_.push_unchecked(st);
  310. }
  311. return sentinel();
  312. }
  313. template<class Handler>
  314. const char*
  315. basic_parser<Handler>::
  316. maybe_suspend(
  317. const char* p,
  318. state st,
  319. std::size_t n)
  320. {
  321. BOOST_ASSERT( p != sentinel() );
  322. end_ = p;
  323. if(BOOST_JSON_LIKELY(more_))
  324. {
  325. // suspend
  326. reserve();
  327. st_.push_unchecked(n);
  328. st_.push_unchecked(st);
  329. }
  330. return sentinel();
  331. }
  332. template<class Handler>
  333. const char*
  334. basic_parser<Handler>::
  335. maybe_suspend(
  336. const char* p,
  337. state st,
  338. const number& num)
  339. {
  340. BOOST_ASSERT( p != sentinel() );
  341. end_ = p;
  342. if(BOOST_JSON_LIKELY(more_))
  343. {
  344. // suspend
  345. num_ = num;
  346. reserve();
  347. st_.push_unchecked(st);;
  348. }
  349. return sentinel();
  350. }
  351. template<class Handler>
  352. const char*
  353. basic_parser<Handler>::
  354. suspend(
  355. const char* p,
  356. state st)
  357. {
  358. BOOST_ASSERT( p != sentinel() );
  359. end_ = p;
  360. // suspend
  361. reserve();
  362. st_.push_unchecked(st);
  363. return sentinel();
  364. }
  365. template<class Handler>
  366. const char*
  367. basic_parser<Handler>::
  368. suspend(
  369. const char* p,
  370. state st,
  371. const number& num)
  372. {
  373. BOOST_ASSERT( p != sentinel() );
  374. end_ = p;
  375. // suspend
  376. num_ = num;
  377. reserve();
  378. st_.push_unchecked(st);
  379. return sentinel();
  380. }
  381. template<class Handler>
  382. template<
  383. bool StackEmpty_/*,
  384. bool Terminal_*/>
  385. const char*
  386. basic_parser<Handler>::
  387. parse_comment(const char* p,
  388. std::integral_constant<bool, StackEmpty_> stack_empty,
  389. /*std::integral_constant<bool, Terminal_>*/ bool terminal)
  390. {
  391. detail::const_stream_wrapper cs(p, end_);
  392. const char* start = cs.begin();
  393. std::size_t remain;
  394. if(! stack_empty && ! st_.empty())
  395. {
  396. state st;
  397. st_.pop(st);
  398. switch(st)
  399. {
  400. default: BOOST_JSON_UNREACHABLE();
  401. case state::com1: goto do_com1;
  402. case state::com2: goto do_com2;
  403. case state::com3: goto do_com3;
  404. case state::com4: goto do_com4;
  405. }
  406. }
  407. BOOST_ASSERT(*cs == '/');
  408. ++cs;
  409. do_com1:
  410. if(BOOST_JSON_UNLIKELY(! cs))
  411. return maybe_suspend(cs.begin(), state::com1);
  412. switch(*cs)
  413. {
  414. default:
  415. {
  416. BOOST_STATIC_CONSTEXPR source_location loc
  417. = BOOST_CURRENT_LOCATION;
  418. return fail(cs.begin(), error::syntax, &loc);
  419. }
  420. case '/':
  421. ++cs;
  422. do_com2:
  423. // KRYSTIAN TODO: this is a mess, we have to fix this
  424. remain = cs.remain();
  425. cs = remain ? static_cast<const char*>(
  426. std::memchr(cs.begin(), '\n', remain)) : sentinel();
  427. if(! cs.begin())
  428. cs = sentinel();
  429. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  430. {
  431. // if the doc does not terminate
  432. // with a newline, treat it as the
  433. // end of the comment
  434. if(terminal && ! more_)
  435. {
  436. if(BOOST_JSON_UNLIKELY(! h_.on_comment(
  437. {start, cs.remain(start)}, ec_)))
  438. return fail(cs.end());
  439. return cs.end();
  440. }
  441. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  442. {start, cs.remain(start)}, ec_)))
  443. return fail(cs.end());
  444. if(terminal)
  445. return suspend(cs.end(), state::com2);
  446. return maybe_suspend(cs.end(), state::com2);
  447. }
  448. break;
  449. case '*':
  450. do
  451. {
  452. ++cs;
  453. do_com3:
  454. // KRYSTIAN TODO: this is a mess, we have to fix this
  455. remain = cs.remain();
  456. cs = remain ? static_cast<const char*>(
  457. std::memchr(cs.begin(), '*', remain)) : sentinel();
  458. if(! cs.begin())
  459. cs = sentinel();
  460. // stopped inside a c comment
  461. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  462. {
  463. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  464. {start, cs.remain(start)}, ec_)))
  465. return fail(cs.end());
  466. return maybe_suspend(cs.end(), state::com3);
  467. }
  468. // found a asterisk, check if the next char is a slash
  469. ++cs;
  470. do_com4:
  471. if(BOOST_JSON_UNLIKELY(! cs))
  472. {
  473. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  474. {start, cs.used(start)}, ec_)))
  475. return fail(cs.begin());
  476. return maybe_suspend(cs.begin(), state::com4);
  477. }
  478. }
  479. while(*cs != '/');
  480. }
  481. ++cs;
  482. if(BOOST_JSON_UNLIKELY(! h_.on_comment(
  483. {start, cs.used(start)}, ec_)))
  484. return fail(cs.begin());
  485. return cs.begin();
  486. }
  487. template<class Handler>
  488. template<bool StackEmpty_>
  489. const char*
  490. basic_parser<Handler>::
  491. parse_document(const char* p,
  492. std::integral_constant<bool, StackEmpty_> stack_empty)
  493. {
  494. detail::const_stream_wrapper cs(p, end_);
  495. if(! stack_empty && ! st_.empty())
  496. {
  497. state st;
  498. st_.peek(st);
  499. switch(st)
  500. {
  501. default: goto do_doc2;
  502. case state::doc1:
  503. st_.pop(st);
  504. goto do_doc1;
  505. case state::doc3:
  506. st_.pop(st);
  507. goto do_doc3;
  508. case state::com1: case state::com2:
  509. case state::com3: case state::com4:
  510. goto do_doc4;
  511. }
  512. }
  513. do_doc1:
  514. cs = detail::count_whitespace(cs.begin(), cs.end());
  515. if(BOOST_JSON_UNLIKELY(! cs))
  516. return maybe_suspend(cs.begin(), state::doc1);
  517. do_doc2:
  518. switch(+opt_.allow_comments |
  519. (opt_.allow_trailing_commas << 1) |
  520. (opt_.allow_invalid_utf8 << 2))
  521. {
  522. // no extensions
  523. default:
  524. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::false_type(), std::false_type(), opt_.allow_invalid_utf16);
  525. break;
  526. // comments
  527. case 1:
  528. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::false_type(), std::false_type(), opt_.allow_invalid_utf16);
  529. break;
  530. // trailing
  531. case 2:
  532. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::true_type(), std::false_type(), opt_.allow_invalid_utf16);
  533. break;
  534. // comments & trailing
  535. case 3:
  536. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::true_type(), std::false_type(), opt_.allow_invalid_utf16);
  537. break;
  538. // skip validation
  539. case 4:
  540. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::false_type(), std::true_type(), opt_.allow_invalid_utf16);
  541. break;
  542. // comments & skip validation
  543. case 5:
  544. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::false_type(), std::true_type(), opt_.allow_invalid_utf16);
  545. break;
  546. // trailing & skip validation
  547. case 6:
  548. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::true_type(), std::true_type(), opt_.allow_invalid_utf16);
  549. break;
  550. // comments & trailing & skip validation
  551. case 7:
  552. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::true_type(), std::true_type(), opt_.allow_invalid_utf16);
  553. break;
  554. }
  555. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  556. // the appropriate state has already been pushed into stack
  557. return sentinel();
  558. do_doc3:
  559. cs = detail::count_whitespace(cs.begin(), cs.end());
  560. if(BOOST_JSON_UNLIKELY(! cs))
  561. {
  562. if(more_)
  563. return suspend(cs.begin(), state::doc3);
  564. }
  565. else if(opt_.allow_comments && *cs == '/')
  566. {
  567. do_doc4:
  568. cs = parse_comment(cs.begin(), stack_empty, std::true_type());
  569. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  570. return sentinel();
  571. goto do_doc3;
  572. }
  573. return cs.begin();
  574. }
  575. template<class Handler>
  576. template<
  577. bool StackEmpty_,
  578. bool AllowComments_/*,
  579. bool AllowTrailing_,
  580. bool AllowBadUTF8_*/>
  581. const char*
  582. basic_parser<Handler>::
  583. parse_value(const char* p,
  584. std::integral_constant<bool, StackEmpty_> stack_empty,
  585. std::integral_constant<bool, AllowComments_> allow_comments,
  586. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  587. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  588. bool allow_bad_utf16)
  589. {
  590. if(stack_empty || st_.empty())
  591. {
  592. loop:
  593. switch(*p)
  594. {
  595. case '0':
  596. return mp11::mp_with_index<3>(
  597. static_cast<unsigned char>(opt_.numbers),
  598. parse_number_helper<true, '0'>{ this, p });
  599. case '-':
  600. return mp11::mp_with_index<3>(
  601. static_cast<unsigned char>(opt_.numbers),
  602. parse_number_helper<true, '-'>{ this, p });
  603. case '1': case '2': case '3':
  604. case '4': case '5': case '6':
  605. case '7': case '8': case '9':
  606. return mp11::mp_with_index<3>(
  607. static_cast<unsigned char>(opt_.numbers),
  608. parse_number_helper<true, '+'>{ this, p });
  609. case 'n':
  610. return parse_literal( p, mp11::mp_int<detail::null_literal>() );
  611. case 't':
  612. return parse_literal( p, mp11::mp_int<detail::true_literal>() );
  613. case 'f':
  614. return parse_literal( p, mp11::mp_int<detail::false_literal>() );
  615. case 'I':
  616. if( !opt_.allow_infinity_and_nan )
  617. {
  618. BOOST_STATIC_CONSTEXPR source_location loc
  619. = BOOST_CURRENT_LOCATION;
  620. return fail(p, error::syntax, &loc);
  621. }
  622. return parse_literal( p, mp11::mp_int<detail::infinity_literal>() );
  623. case 'N':
  624. if( !opt_.allow_infinity_and_nan )
  625. {
  626. BOOST_STATIC_CONSTEXPR source_location loc
  627. = BOOST_CURRENT_LOCATION;
  628. return fail(p, error::syntax, &loc);
  629. }
  630. return parse_literal( p, mp11::mp_int<detail::nan_literal>() );
  631. case '"':
  632. return parse_string(p, std::true_type(), std::false_type(), allow_bad_utf8, allow_bad_utf16);
  633. case '[':
  634. return parse_array(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  635. case '{':
  636. return parse_object(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  637. case '/':
  638. if(! allow_comments)
  639. {
  640. BOOST_STATIC_CONSTEXPR source_location loc
  641. = BOOST_CURRENT_LOCATION;
  642. return fail(p, error::syntax, &loc);
  643. }
  644. p = parse_comment(p, stack_empty, std::false_type());
  645. // KRYSTIAN NOTE: incomplete takes const_stream, we either
  646. // can add an overload, change the existing one to take a pointer,
  647. // or just leave it as is
  648. if(BOOST_JSON_UNLIKELY(p == sentinel()))
  649. return maybe_suspend(p, state::val2);
  650. // intentional fallthrough
  651. case ' ':
  652. case '\t':
  653. case '\n':
  654. case '\r':
  655. p = detail::count_whitespace(p, end_);
  656. if(BOOST_JSON_UNLIKELY(p == end_))
  657. return maybe_suspend(p, state::val1);
  658. goto loop;
  659. default:
  660. {
  661. BOOST_STATIC_CONSTEXPR source_location loc
  662. = BOOST_CURRENT_LOCATION;
  663. return fail(p, error::syntax, &loc);
  664. }
  665. }
  666. }
  667. return resume_value(p, allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  668. }
  669. template<class Handler>
  670. template<
  671. bool AllowComments_/*,
  672. bool AllowTrailing_,
  673. bool AllowBadUTF8_*/>
  674. const char*
  675. basic_parser<Handler>::
  676. resume_value(const char* p,
  677. std::integral_constant<bool, AllowComments_> allow_comments,
  678. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  679. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  680. bool allow_bad_utf16)
  681. {
  682. state st;
  683. st_.peek(st);
  684. switch(st)
  685. {
  686. default: BOOST_JSON_UNREACHABLE();
  687. case state::lit1:
  688. return parse_literal(p, mp11::mp_int<detail::resume_literal>() );
  689. case state::str1: case state::str2:
  690. case state::str8:
  691. return parse_string(p, std::false_type(), std::false_type(), allow_bad_utf8, allow_bad_utf16);
  692. case state::arr1: case state::arr2:
  693. case state::arr3: case state::arr4:
  694. case state::arr5: case state::arr6:
  695. return parse_array(p, std::false_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  696. case state::obj1: case state::obj2:
  697. case state::obj3: case state::obj4:
  698. case state::obj5: case state::obj6:
  699. case state::obj7: case state::obj8:
  700. case state::obj9: case state::obj10:
  701. case state::obj11:
  702. return parse_object(p, std::false_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  703. case state::num1: case state::num2:
  704. case state::num3: case state::num4:
  705. case state::num5: case state::num6:
  706. case state::num7: case state::num8:
  707. case state::exp1: case state::exp2:
  708. case state::exp3:
  709. return mp11::mp_with_index<3>(
  710. static_cast<unsigned char>(opt_.numbers),
  711. parse_number_helper<false, 0>{ this, p });
  712. // KRYSTIAN NOTE: these are special cases
  713. case state::val1:
  714. {
  715. st_.pop(st);
  716. BOOST_ASSERT(st_.empty());
  717. p = detail::count_whitespace(p, end_);
  718. if(BOOST_JSON_UNLIKELY(p == end_))
  719. return maybe_suspend(p, state::val1);
  720. return parse_value(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  721. }
  722. case state::val2:
  723. {
  724. st_.pop(st);
  725. p = parse_comment(p, std::false_type(), std::false_type());
  726. if(BOOST_JSON_UNLIKELY(p == sentinel()))
  727. return maybe_suspend(p, state::val2);
  728. if(BOOST_JSON_UNLIKELY( p == end_ ))
  729. return maybe_suspend(p, state::val3);
  730. BOOST_ASSERT(st_.empty());
  731. return parse_value(p, std::true_type(), std::true_type(), allow_trailing, allow_bad_utf8, allow_bad_utf16);
  732. }
  733. case state::val3:
  734. {
  735. st_.pop(st);
  736. return parse_value(p, std::true_type(), std::true_type(), allow_trailing, allow_bad_utf8, allow_bad_utf16);
  737. }
  738. }
  739. }
  740. template<class Handler>
  741. template<int Literal>
  742. const char*
  743. basic_parser<Handler>::
  744. parse_literal(const char* p,
  745. std::integral_constant<int, Literal> literal)
  746. {
  747. constexpr char const* literals[] = {
  748. "null",
  749. "true",
  750. "false",
  751. "Infinity",
  752. "-Infinity",
  753. "NaN",
  754. };
  755. constexpr std::size_t literal_sizes[] = {
  756. 4,
  757. 4,
  758. 5,
  759. 8,
  760. 9,
  761. 3,
  762. };
  763. std::size_t cur_lit;
  764. std::size_t offset;
  765. detail::const_stream_wrapper cs(p, end_);
  766. BOOST_IF_CONSTEXPR( literal != detail::resume_literal )
  767. {
  768. BOOST_ASSERT( literal >= 0 );
  769. if(BOOST_JSON_LIKELY( cs.remain() >= literal_sizes[literal] ))
  770. {
  771. int const cmp = std::memcmp(
  772. cs.begin(), literals[literal], literal_sizes[literal] );
  773. if( cmp != 0 )
  774. {
  775. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  776. return fail(cs.begin(), error::syntax, &loc);
  777. }
  778. BOOST_IF_CONSTEXPR( literal == detail::null_literal )
  779. {
  780. if(BOOST_JSON_UNLIKELY(
  781. ! h_.on_null(ec_)))
  782. return fail(cs.begin());
  783. }
  784. else BOOST_IF_CONSTEXPR( literal == detail::true_literal )
  785. {
  786. if(BOOST_JSON_UNLIKELY(
  787. ! h_.on_bool(true, ec_)))
  788. return fail(cs.begin());
  789. }
  790. else BOOST_IF_CONSTEXPR( literal == detail::false_literal )
  791. {
  792. if(BOOST_JSON_UNLIKELY(
  793. ! h_.on_bool(false, ec_)))
  794. return fail(cs.begin());
  795. }
  796. else BOOST_IF_CONSTEXPR( literal == detail::infinity_literal )
  797. {
  798. if(BOOST_JSON_UNLIKELY(
  799. ! h_.on_double(
  800. std::numeric_limits<double>::infinity(),
  801. string_view(
  802. literals[detail::infinity_literal],
  803. literal_sizes[detail::infinity_literal]),
  804. ec_)))
  805. return fail(cs.begin());
  806. }
  807. else BOOST_IF_CONSTEXPR( literal == detail::neg_infinity_literal )
  808. {
  809. if(BOOST_JSON_UNLIKELY(
  810. ! h_.on_double(
  811. -std::numeric_limits<double>::infinity(),
  812. string_view(
  813. literals[detail::neg_infinity_literal],
  814. literal_sizes[detail::neg_infinity_literal]),
  815. ec_)))
  816. return fail(cs.begin());
  817. }
  818. else BOOST_IF_CONSTEXPR( literal == detail::nan_literal )
  819. {
  820. if(BOOST_JSON_UNLIKELY(
  821. ! h_.on_double(
  822. std::numeric_limits<double>::quiet_NaN(),
  823. string_view(
  824. literals[detail::nan_literal],
  825. literal_sizes[detail::nan_literal]),
  826. ec_)))
  827. return fail(cs.begin());
  828. }
  829. else
  830. {
  831. BOOST_JSON_UNREACHABLE();
  832. }
  833. cs += literal_sizes[literal];
  834. return cs.begin();
  835. }
  836. offset = 0;
  837. cur_lit = literal;
  838. }
  839. else
  840. {
  841. state st;
  842. st_.pop(st);
  843. BOOST_ASSERT( st == state::lit1 );
  844. cur_lit = cur_lit_;
  845. offset = lit_offset_;
  846. }
  847. std::size_t const size = (std::min)(
  848. literal_sizes[cur_lit] - offset, cs.remain() );
  849. int cmp = 0;
  850. if(BOOST_JSON_LIKELY( cs.begin() ))
  851. cmp = std::memcmp( cs.begin(), literals[cur_lit] + offset, size );
  852. if( cmp != 0 )
  853. {
  854. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  855. return fail(cs.begin(), error::syntax, &loc);
  856. }
  857. if(BOOST_JSON_UNLIKELY( offset + size < literal_sizes[cur_lit] ))
  858. {
  859. BOOST_ASSERT( cur_lit < 256 );
  860. cur_lit_ = static_cast<unsigned char>( cur_lit );
  861. BOOST_ASSERT( offset + size < 256 );
  862. lit_offset_ = static_cast<unsigned char>( offset + size );
  863. return maybe_suspend(cs.begin() + size, state::lit1);
  864. }
  865. switch( cur_lit )
  866. {
  867. case detail::null_literal:
  868. if(BOOST_JSON_UNLIKELY(
  869. ! h_.on_null(ec_)))
  870. return fail(cs.begin());
  871. break;
  872. case detail::true_literal:
  873. if(BOOST_JSON_UNLIKELY(
  874. ! h_.on_bool(true, ec_)))
  875. return fail(cs.begin());
  876. break;
  877. case detail::false_literal:
  878. if(BOOST_JSON_UNLIKELY(
  879. ! h_.on_bool(false, ec_)))
  880. return fail(cs.begin());
  881. break;
  882. case detail::infinity_literal:
  883. if(BOOST_JSON_UNLIKELY(
  884. ! h_.on_double(
  885. std::numeric_limits<double>::infinity(),
  886. string_view(
  887. literals[detail::infinity_literal],
  888. literal_sizes[detail::infinity_literal]),
  889. ec_)))
  890. return fail(cs.begin());
  891. break;
  892. case detail::neg_infinity_literal:
  893. if(BOOST_JSON_UNLIKELY(
  894. ! h_.on_double(
  895. -std::numeric_limits<double>::infinity(),
  896. string_view(
  897. literals[detail::neg_infinity_literal],
  898. literal_sizes[detail::neg_infinity_literal]),
  899. ec_)))
  900. return fail(cs.begin());
  901. break;
  902. case detail::nan_literal:
  903. if(BOOST_JSON_UNLIKELY(
  904. ! h_.on_double(
  905. std::numeric_limits<double>::quiet_NaN(),
  906. string_view(
  907. literals[detail::nan_literal],
  908. literal_sizes[detail::nan_literal]),
  909. ec_)))
  910. return fail(cs.begin());
  911. break;
  912. default: BOOST_JSON_UNREACHABLE();
  913. }
  914. cs += size;
  915. return cs.begin();
  916. }
  917. //----------------------------------------------------------
  918. template<class Handler>
  919. template<bool StackEmpty_, bool IsKey_>
  920. const char*
  921. basic_parser<Handler>::
  922. parse_string(const char* p,
  923. std::integral_constant<bool, StackEmpty_> stack_empty,
  924. std::integral_constant<bool, IsKey_> is_key,
  925. bool allow_bad_utf8,
  926. bool allow_bad_utf16)
  927. {
  928. detail::const_stream_wrapper cs(p, end_);
  929. std::size_t total;
  930. char const* start;
  931. std::size_t size;
  932. if(! stack_empty && ! st_.empty())
  933. {
  934. state st;
  935. st_.pop(st);
  936. st_.pop(total);
  937. switch(st)
  938. {
  939. default: BOOST_JSON_UNREACHABLE();
  940. case state::str2: goto do_str2;
  941. case state::str8: goto do_str8;
  942. case state::str1: break;
  943. }
  944. }
  945. else
  946. {
  947. BOOST_ASSERT(*cs == '\x22'); // '"'
  948. ++cs;
  949. total = 0;
  950. }
  951. do_str1:
  952. start = cs.begin();
  953. cs = allow_bad_utf8?
  954. detail::count_valid<true>(cs.begin(), cs.end()):
  955. detail::count_valid<false>(cs.begin(), cs.end());
  956. size = cs.used(start);
  957. if(is_key)
  958. {
  959. BOOST_ASSERT(total <= Handler::max_key_size);
  960. if(BOOST_JSON_UNLIKELY(size >
  961. Handler::max_key_size - total))
  962. {
  963. BOOST_STATIC_CONSTEXPR source_location loc
  964. = BOOST_CURRENT_LOCATION;
  965. return fail(cs.begin(), error::key_too_large, &loc);
  966. }
  967. }
  968. else
  969. {
  970. BOOST_ASSERT(total <= Handler::max_string_size);
  971. if(BOOST_JSON_UNLIKELY(size >
  972. Handler::max_string_size - total))
  973. {
  974. BOOST_STATIC_CONSTEXPR source_location loc
  975. = BOOST_CURRENT_LOCATION;
  976. return fail(cs.begin(), error::string_too_large, &loc);
  977. }
  978. }
  979. total += size;
  980. if(BOOST_JSON_UNLIKELY(! cs))
  981. {
  982. // call handler if the string isn't empty
  983. if(BOOST_JSON_LIKELY(size))
  984. {
  985. {
  986. bool r = is_key?
  987. h_.on_key_part( {start, size}, total, ec_ ):
  988. h_.on_string_part( {start, size}, total, ec_ );
  989. if(BOOST_JSON_UNLIKELY(!r))
  990. {
  991. return fail(cs.begin());
  992. }
  993. }
  994. }
  995. return maybe_suspend(cs.begin(), state::str1, total);
  996. }
  997. // at this point all valid characters have been skipped, so any remaining
  998. // if there are any more characters, they are either escaped, or incomplete
  999. // utf8, or invalid utf8
  1000. if(BOOST_JSON_UNLIKELY(*cs != '\x22')) // '"'
  1001. {
  1002. // sequence is invalid or incomplete
  1003. if((*cs & 0x80) && !allow_bad_utf8)
  1004. {
  1005. seq_.save(cs.begin(), cs.remain());
  1006. if(BOOST_JSON_UNLIKELY(seq_.complete()))
  1007. {
  1008. BOOST_STATIC_CONSTEXPR source_location loc
  1009. = BOOST_CURRENT_LOCATION;
  1010. return fail(cs.begin(), error::syntax, &loc);
  1011. }
  1012. if(BOOST_JSON_LIKELY(size))
  1013. {
  1014. bool const r = is_key?
  1015. h_.on_key_part( {start, size}, total, ec_ ):
  1016. h_.on_string_part( {start, size}, total, ec_ );
  1017. if(BOOST_JSON_UNLIKELY( !r ))
  1018. return fail( cs.begin() );
  1019. }
  1020. return maybe_suspend(cs.end(), state::str8, total);
  1021. }
  1022. else if(BOOST_JSON_LIKELY(*cs == '\\'))
  1023. {
  1024. // flush unescaped run from input
  1025. if(BOOST_JSON_LIKELY(size))
  1026. {
  1027. bool const r = is_key?
  1028. h_.on_key_part( {start, size}, total, ec_ ):
  1029. h_.on_string_part( {start, size}, total, ec_ );
  1030. if(BOOST_JSON_UNLIKELY( !r ))
  1031. return fail( cs.begin() );
  1032. }
  1033. do_str2:
  1034. cs = parse_escaped(cs.begin(), total, stack_empty, is_key, allow_bad_utf16);
  1035. if(BOOST_JSON_UNLIKELY( incomplete(cs) ))
  1036. return suspend_or_fail(state::str2, total);
  1037. goto do_str1;
  1038. }
  1039. // illegal control
  1040. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1041. return fail(cs.begin(), error::syntax, &loc);
  1042. }
  1043. {
  1044. bool r = is_key?
  1045. h_.on_key( {start, size}, total, ec_ ):
  1046. h_.on_string( {start, size}, total, ec_ );
  1047. if(BOOST_JSON_UNLIKELY(!r))
  1048. {
  1049. return fail(cs.begin());
  1050. }
  1051. }
  1052. ++cs;
  1053. return cs.begin();
  1054. do_str8:
  1055. uint8_t needed = seq_.needed();
  1056. if(BOOST_JSON_UNLIKELY( !seq_.append(cs.begin(), cs.remain()) ))
  1057. return maybe_suspend(cs.end(), state::str8, total);
  1058. if(BOOST_JSON_UNLIKELY( !seq_.valid() ))
  1059. {
  1060. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1061. return fail(cs.begin(), error::syntax, &loc);
  1062. }
  1063. {
  1064. bool const r = is_key?
  1065. h_.on_key_part( {seq_.data(), seq_.length()}, total, ec_ ):
  1066. h_.on_string_part( {seq_.data(), seq_.length()}, total, ec_ );
  1067. if(BOOST_JSON_UNLIKELY( !r ))
  1068. return fail( cs.begin() );
  1069. }
  1070. cs += needed;
  1071. goto do_str1;
  1072. }
  1073. template<class Handler>
  1074. template<bool StackEmpty_>
  1075. const char*
  1076. basic_parser<Handler>::
  1077. parse_escaped(
  1078. const char* p,
  1079. std::size_t& total,
  1080. std::integral_constant<bool, StackEmpty_> stack_empty,
  1081. bool is_key,
  1082. bool allow_bad_utf16)
  1083. {
  1084. constexpr unsigned urc = 0xFFFD; // Unicode replacement character
  1085. auto const ev_too_large = is_key?
  1086. error::key_too_large : error::string_too_large;
  1087. auto const max_size = is_key?
  1088. Handler::max_key_size : Handler::max_string_size;
  1089. int digit;
  1090. //---------------------------------------------------------------
  1091. //
  1092. // To handle escapes, a local temporary buffer accumulates
  1093. // the unescaped result. The algorithm attempts to fill the
  1094. // buffer to capacity before invoking the handler.
  1095. // In some cases the temporary buffer needs to be flushed
  1096. // before it is full:
  1097. // * When the closing double quote is seen
  1098. // * When there in no more input (and more is expected later)
  1099. // A goal of the algorithm is to call the handler as few times
  1100. // as possible. Thus, when the first escape is encountered,
  1101. // the algorithm attempts to fill the temporary buffer first.
  1102. //
  1103. detail::buffer<BOOST_JSON_STACK_BUFFER_SIZE> temp;
  1104. // Unescaped JSON is never larger than its escaped version.
  1105. // To efficiently process only what will fit in the temporary buffer,
  1106. // the size of the input stream is temporarily "clipped" to the size
  1107. // of the temporary buffer.
  1108. // handle escaped character
  1109. detail::clipped_const_stream cs(p, end_);
  1110. cs.clip(temp.max_size());
  1111. if(! stack_empty && ! st_.empty())
  1112. {
  1113. state st;
  1114. st_.pop(st);
  1115. switch(st)
  1116. {
  1117. default: BOOST_JSON_UNREACHABLE();
  1118. case state::str3: goto do_str3;
  1119. case state::str4: goto do_str4;
  1120. case state::str5: goto do_str5;
  1121. case state::str6: goto do_str6;
  1122. case state::str7: goto do_str7;
  1123. case state::sur1: goto do_sur1;
  1124. case state::sur2: goto do_sur2;
  1125. case state::sur3: goto do_sur3;
  1126. case state::sur4: goto do_sur4;
  1127. case state::sur5: goto do_sur5;
  1128. case state::sur6: goto do_sur6;
  1129. }
  1130. }
  1131. while(true)
  1132. {
  1133. BOOST_ASSERT( temp.capacity() );
  1134. BOOST_ASSERT(*cs == '\\');
  1135. ++cs;
  1136. do_str3:
  1137. if(BOOST_JSON_UNLIKELY(! cs))
  1138. {
  1139. if(BOOST_JSON_LIKELY(! temp.empty()))
  1140. {
  1141. BOOST_ASSERT(total <= max_size);
  1142. if(BOOST_JSON_UNLIKELY(
  1143. temp.size() > max_size - total))
  1144. {
  1145. BOOST_STATIC_CONSTEXPR source_location loc
  1146. = BOOST_CURRENT_LOCATION;
  1147. return fail(cs.begin(), ev_too_large, &loc);
  1148. }
  1149. total += temp.size();
  1150. {
  1151. bool r = is_key
  1152. ? h_.on_key_part(temp.get(), total, ec_)
  1153. : h_.on_string_part(temp.get(), total, ec_);
  1154. if(BOOST_JSON_UNLIKELY(!r))
  1155. {
  1156. return fail(cs.begin());
  1157. }
  1158. }
  1159. temp.clear();
  1160. }
  1161. cs.clip(temp.max_size());
  1162. if(BOOST_JSON_UNLIKELY(! cs))
  1163. return maybe_suspend(cs.begin(), state::str3);
  1164. }
  1165. switch(*cs)
  1166. {
  1167. default:
  1168. {
  1169. BOOST_STATIC_CONSTEXPR source_location loc
  1170. = BOOST_CURRENT_LOCATION;
  1171. return fail(cs.begin(), error::syntax, &loc);
  1172. }
  1173. case '\x22': // '"'
  1174. temp.push_back('\x22');
  1175. ++cs;
  1176. break;
  1177. case '\\':
  1178. temp.push_back('\\');
  1179. ++cs;
  1180. break;
  1181. case '/':
  1182. temp.push_back('/');
  1183. ++cs;
  1184. break;
  1185. case 'b':
  1186. temp.push_back('\x08');
  1187. ++cs;
  1188. break;
  1189. case 'f':
  1190. temp.push_back('\x0c');
  1191. ++cs;
  1192. break;
  1193. case 'n':
  1194. temp.push_back('\x0a');
  1195. ++cs;
  1196. break;
  1197. case 'r':
  1198. temp.push_back('\x0d');
  1199. ++cs;
  1200. break;
  1201. case 't':
  1202. temp.push_back('\x09');
  1203. ++cs;
  1204. break;
  1205. case 'u':
  1206. // utf16 escape
  1207. //
  1208. // fast path only when the buffer
  1209. // is large enough for 2 surrogates
  1210. if(BOOST_JSON_LIKELY(cs.remain() > 10))
  1211. {
  1212. // KRYSTIAN TODO: this could be done
  1213. // with fewer instructions
  1214. digit = detail::load_little_endian<4>(
  1215. cs.begin() + 1);
  1216. int d4 = detail::hex_digit(static_cast<
  1217. unsigned char>(digit >> 24));
  1218. int d3 = detail::hex_digit(static_cast<
  1219. unsigned char>(digit >> 16));
  1220. int d2 = detail::hex_digit(static_cast<
  1221. unsigned char>(digit >> 8));
  1222. int d1 = detail::hex_digit(static_cast<
  1223. unsigned char>(digit));
  1224. if(BOOST_JSON_UNLIKELY(
  1225. (d1 | d2 | d3 | d4) == -1))
  1226. {
  1227. if(d1 != -1)
  1228. ++cs;
  1229. if(d2 != -1)
  1230. ++cs;
  1231. if(d3 != -1)
  1232. ++cs;
  1233. BOOST_STATIC_CONSTEXPR source_location loc
  1234. = BOOST_CURRENT_LOCATION;
  1235. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1236. }
  1237. // 32 bit unicode scalar value
  1238. unsigned u1 =
  1239. (d1 << 12) + (d2 << 8) +
  1240. (d3 << 4) + d4;
  1241. // valid unicode scalar values are
  1242. // [0, D7FF] and [E000, 10FFFF]
  1243. // values within this range are valid utf-8
  1244. // code points and invalid leading surrogates.
  1245. if(BOOST_JSON_LIKELY(
  1246. u1 < 0xd800 || u1 > 0xdfff))
  1247. {
  1248. cs += 5;
  1249. temp.append_utf8(u1);
  1250. break;
  1251. }
  1252. if(BOOST_JSON_UNLIKELY(u1 > 0xdbff))
  1253. {
  1254. // If it's an illegal leading surrogate and
  1255. // the parser does not allow it, return an error.
  1256. if(!allow_bad_utf16)
  1257. {
  1258. BOOST_STATIC_CONSTEXPR source_location loc
  1259. = BOOST_CURRENT_LOCATION;
  1260. return fail(cs.begin(), error::illegal_leading_surrogate,
  1261. &loc);
  1262. }
  1263. // Otherwise, append the Unicode replacement character
  1264. else
  1265. {
  1266. cs += 5;
  1267. temp.append_utf8(urc);
  1268. break;
  1269. }
  1270. }
  1271. cs += 5;
  1272. // KRYSTIAN TODO: this can be a two byte load
  1273. // and a single comparison. We lose error information,
  1274. // but it's faster.
  1275. if(BOOST_JSON_UNLIKELY(*cs != '\\'))
  1276. {
  1277. // If the next character is not a backslash and
  1278. // the parser does not allow it, return a syntax error.
  1279. if(!allow_bad_utf16)
  1280. {
  1281. BOOST_STATIC_CONSTEXPR source_location loc
  1282. = BOOST_CURRENT_LOCATION;
  1283. return fail(cs.begin(), error::syntax, &loc);
  1284. }
  1285. // Otherwise, append the Unicode replacement character since
  1286. // the first code point is a valid leading surrogate
  1287. else
  1288. {
  1289. temp.append_utf8(urc);
  1290. break;
  1291. }
  1292. }
  1293. ++cs;
  1294. if(BOOST_JSON_UNLIKELY(*cs != 'u'))
  1295. {
  1296. if (!allow_bad_utf16)
  1297. {
  1298. BOOST_STATIC_CONSTEXPR source_location loc
  1299. = BOOST_CURRENT_LOCATION;
  1300. return fail(cs.begin(), error::syntax, &loc);
  1301. }
  1302. // Otherwise, append the Unicode replacement character since
  1303. // the first code point is a valid leading surrogate
  1304. else
  1305. {
  1306. temp.append_utf8(urc);
  1307. goto do_str3;
  1308. }
  1309. }
  1310. ++cs;
  1311. digit = detail::load_little_endian<4>(cs.begin());
  1312. d4 = detail::hex_digit(static_cast<
  1313. unsigned char>(digit >> 24));
  1314. d3 = detail::hex_digit(static_cast<
  1315. unsigned char>(digit >> 16));
  1316. d2 = detail::hex_digit(static_cast<
  1317. unsigned char>(digit >> 8));
  1318. d1 = detail::hex_digit(static_cast<
  1319. unsigned char>(digit));
  1320. if(BOOST_JSON_UNLIKELY(
  1321. (d1 | d2 | d3 | d4) == -1))
  1322. {
  1323. if(d1 != -1)
  1324. ++cs;
  1325. if(d2 != -1)
  1326. ++cs;
  1327. if(d3 != -1)
  1328. ++cs;
  1329. BOOST_STATIC_CONSTEXPR source_location loc
  1330. = BOOST_CURRENT_LOCATION;
  1331. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1332. }
  1333. unsigned u2 =
  1334. (d1 << 12) + (d2 << 8) +
  1335. (d3 << 4) + d4;
  1336. // Check if the second code point is a valid trailing surrogate.
  1337. // Valid trailing surrogates are [DC00, DFFF]
  1338. if(BOOST_JSON_UNLIKELY(
  1339. u2 < 0xdc00 || u2 > 0xdfff))
  1340. {
  1341. // If not valid and the parser does not allow it, return an error.
  1342. if(!allow_bad_utf16)
  1343. {
  1344. BOOST_STATIC_CONSTEXPR source_location loc
  1345. = BOOST_CURRENT_LOCATION;
  1346. return fail(cs.begin(), error::illegal_trailing_surrogate,
  1347. &loc);
  1348. }
  1349. // Append the replacement character for the
  1350. // first leading surrogate.
  1351. cs += 4;
  1352. temp.append_utf8(urc);
  1353. // Check if the second code point is a
  1354. // valid unicode scalar value (invalid leading
  1355. // or trailing surrogate)
  1356. if (u2 < 0xd800 || u2 > 0xdbff)
  1357. {
  1358. temp.append_utf8(u2);
  1359. break;
  1360. }
  1361. // If it is a valid leading surrogate
  1362. else
  1363. {
  1364. u1_ = u2;
  1365. goto do_sur1;
  1366. }
  1367. }
  1368. cs += 4;
  1369. // Calculate the Unicode code point from the surrogate pair and
  1370. // append the UTF-8 representation.
  1371. unsigned cp =
  1372. ((u1 - 0xd800) << 10) +
  1373. ((u2 - 0xdc00)) +
  1374. 0x10000;
  1375. // utf-16 surrogate pair
  1376. temp.append_utf8(cp);
  1377. break;
  1378. }
  1379. // flush
  1380. if(BOOST_JSON_LIKELY(! temp.empty()))
  1381. {
  1382. BOOST_ASSERT(total <= max_size);
  1383. if(BOOST_JSON_UNLIKELY(
  1384. temp.size() > max_size - total))
  1385. {
  1386. BOOST_STATIC_CONSTEXPR source_location loc
  1387. = BOOST_CURRENT_LOCATION;
  1388. return fail(cs.begin(), ev_too_large, &loc);
  1389. }
  1390. total += temp.size();
  1391. {
  1392. bool r = is_key
  1393. ? h_.on_key_part(temp.get(), total, ec_)
  1394. : h_.on_string_part(temp.get(), total, ec_);
  1395. if(BOOST_JSON_UNLIKELY(!r))
  1396. {
  1397. return fail(cs.begin());
  1398. }
  1399. }
  1400. temp.clear();
  1401. cs.clip(temp.max_size());
  1402. }
  1403. ++cs;
  1404. // utf-16 escape
  1405. do_str4:
  1406. if(BOOST_JSON_UNLIKELY(! cs))
  1407. return maybe_suspend(cs.begin(), state::str4);
  1408. digit = detail::hex_digit(*cs);
  1409. if(BOOST_JSON_UNLIKELY(digit == -1))
  1410. {
  1411. BOOST_STATIC_CONSTEXPR source_location loc
  1412. = BOOST_CURRENT_LOCATION;
  1413. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1414. }
  1415. ++cs;
  1416. u1_ = digit << 12;
  1417. do_str5:
  1418. if(BOOST_JSON_UNLIKELY(! cs))
  1419. return maybe_suspend(cs.begin(), state::str5);
  1420. digit = detail::hex_digit(*cs);
  1421. if(BOOST_JSON_UNLIKELY(digit == -1))
  1422. {
  1423. BOOST_STATIC_CONSTEXPR source_location loc
  1424. = BOOST_CURRENT_LOCATION;
  1425. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1426. }
  1427. ++cs;
  1428. u1_ += digit << 8;
  1429. do_str6:
  1430. if(BOOST_JSON_UNLIKELY(! cs))
  1431. return maybe_suspend(cs.begin(), state::str6);
  1432. digit = detail::hex_digit(*cs);
  1433. if(BOOST_JSON_UNLIKELY(digit == -1))
  1434. {
  1435. BOOST_STATIC_CONSTEXPR source_location loc
  1436. = BOOST_CURRENT_LOCATION;
  1437. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1438. }
  1439. ++cs;
  1440. u1_ += digit << 4;
  1441. do_str7:
  1442. if(BOOST_JSON_UNLIKELY(! cs))
  1443. return maybe_suspend(cs.begin(), state::str7);
  1444. digit = detail::hex_digit(*cs);
  1445. if(BOOST_JSON_UNLIKELY(digit == -1))
  1446. {
  1447. BOOST_STATIC_CONSTEXPR source_location loc
  1448. = BOOST_CURRENT_LOCATION;
  1449. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1450. }
  1451. ++cs;
  1452. u1_ += digit;
  1453. if(BOOST_JSON_LIKELY(
  1454. u1_ < 0xd800 || u1_ > 0xdfff))
  1455. {
  1456. BOOST_ASSERT(temp.empty());
  1457. // utf-8 codepoint
  1458. temp.append_utf8(u1_);
  1459. break;
  1460. }
  1461. if(BOOST_JSON_UNLIKELY(u1_ > 0xdbff))
  1462. {
  1463. // If it's an illegal leading surrogate and
  1464. // the parser does not allow it, return an error.
  1465. if(!allow_bad_utf16)
  1466. {
  1467. BOOST_STATIC_CONSTEXPR source_location loc
  1468. = BOOST_CURRENT_LOCATION;
  1469. return fail(cs.begin(), error::illegal_leading_surrogate, &loc);
  1470. }
  1471. // Otherwise, append the Unicode replacement character
  1472. else
  1473. {
  1474. BOOST_ASSERT(temp.empty());
  1475. temp.append_utf8(urc);
  1476. break;
  1477. }
  1478. }
  1479. do_sur1:
  1480. if(BOOST_JSON_UNLIKELY(! cs))
  1481. return maybe_suspend(cs.begin(), state::sur1);
  1482. if(BOOST_JSON_UNLIKELY(*cs != '\\'))
  1483. {
  1484. // If the next character is not a backslash and
  1485. // the parser does not allow it, return a syntax error.
  1486. if(!allow_bad_utf16)
  1487. {
  1488. BOOST_STATIC_CONSTEXPR source_location loc
  1489. = BOOST_CURRENT_LOCATION;
  1490. return fail(cs.begin(), error::syntax, &loc);
  1491. }
  1492. // Otherwise, append the Unicode replacement character since
  1493. // the first code point is a valid leading surrogate
  1494. else
  1495. {
  1496. temp.append_utf8(urc);
  1497. break;
  1498. }
  1499. }
  1500. ++cs;
  1501. do_sur2:
  1502. if(BOOST_JSON_UNLIKELY(! cs))
  1503. return maybe_suspend(cs.begin(), state::sur2);
  1504. if(BOOST_JSON_UNLIKELY(*cs != 'u'))
  1505. {
  1506. if (!allow_bad_utf16)
  1507. {
  1508. BOOST_STATIC_CONSTEXPR source_location loc
  1509. = BOOST_CURRENT_LOCATION;
  1510. return fail(cs.begin(), error::syntax, &loc);
  1511. }
  1512. // Otherwise, append the Unicode replacement character since
  1513. // the first code point is a valid leading surrogate
  1514. else
  1515. {
  1516. temp.append_utf8(urc);
  1517. goto do_str3;
  1518. }
  1519. }
  1520. ++cs;
  1521. do_sur3:
  1522. if(BOOST_JSON_UNLIKELY(! cs))
  1523. return maybe_suspend(cs.begin(), state::sur3);
  1524. digit = detail::hex_digit(*cs);
  1525. if(BOOST_JSON_UNLIKELY(digit == -1))
  1526. {
  1527. BOOST_STATIC_CONSTEXPR source_location loc
  1528. = BOOST_CURRENT_LOCATION;
  1529. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1530. }
  1531. ++cs;
  1532. u2_ = digit << 12;
  1533. do_sur4:
  1534. if(BOOST_JSON_UNLIKELY(! cs))
  1535. return maybe_suspend(cs.begin(), state::sur4);
  1536. digit = detail::hex_digit(*cs);
  1537. if(BOOST_JSON_UNLIKELY(digit == -1))
  1538. {
  1539. BOOST_STATIC_CONSTEXPR source_location loc
  1540. = BOOST_CURRENT_LOCATION;
  1541. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1542. }
  1543. ++cs;
  1544. u2_ += digit << 8;
  1545. do_sur5:
  1546. if(BOOST_JSON_UNLIKELY(! cs))
  1547. return maybe_suspend(cs.begin(), state::sur5);
  1548. digit = detail::hex_digit(*cs);
  1549. if(BOOST_JSON_UNLIKELY(digit == -1))
  1550. {
  1551. BOOST_STATIC_CONSTEXPR source_location loc
  1552. = BOOST_CURRENT_LOCATION;
  1553. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1554. }
  1555. ++cs;
  1556. u2_ += digit << 4;
  1557. do_sur6:
  1558. if(BOOST_JSON_UNLIKELY(! cs))
  1559. return maybe_suspend(cs.begin(), state::sur6);
  1560. digit = detail::hex_digit(*cs);
  1561. if(BOOST_JSON_UNLIKELY(digit == -1))
  1562. {
  1563. BOOST_STATIC_CONSTEXPR source_location loc
  1564. = BOOST_CURRENT_LOCATION;
  1565. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1566. }
  1567. ++cs;
  1568. u2_ += digit;
  1569. // Check if the second code point is a valid trailing surrogate.
  1570. // Valid trailing surrogates are [DC00, DFFF]
  1571. if(BOOST_JSON_UNLIKELY(
  1572. u2_ < 0xdc00 || u2_ > 0xdfff))
  1573. {
  1574. // If not valid and the parser does not allow it, return an error.
  1575. if(!allow_bad_utf16)
  1576. {
  1577. BOOST_STATIC_CONSTEXPR source_location loc
  1578. = BOOST_CURRENT_LOCATION;
  1579. return fail(cs.begin(), error::illegal_trailing_surrogate, &loc);
  1580. }
  1581. // Append the replacement character for the
  1582. // first leading surrogate.
  1583. temp.append_utf8(urc);
  1584. // Check if the second code point is a
  1585. // valid unicode scalar value (invalid leading
  1586. // or trailing surrogate)
  1587. if (u2_ < 0xd800 || u2_ > 0xdbff)
  1588. {
  1589. temp.append_utf8(u2_);
  1590. break;
  1591. }
  1592. // If it is a valid leading surrogate
  1593. else
  1594. {
  1595. u1_ = u2_;
  1596. goto do_sur1;
  1597. }
  1598. }
  1599. // Calculate the Unicode code point from the surrogate pair and
  1600. // append the UTF-8 representation.
  1601. unsigned cp =
  1602. ((u1_ - 0xd800) << 10) +
  1603. ((u2_ - 0xdc00)) +
  1604. 0x10000;
  1605. // utf-16 surrogate pair
  1606. temp.append_utf8(cp);
  1607. }
  1608. // flush
  1609. if(BOOST_JSON_UNLIKELY( !cs ) || *cs != '\\')
  1610. break;
  1611. }
  1612. if(BOOST_JSON_LIKELY( temp.size() ))
  1613. {
  1614. BOOST_ASSERT(total <= max_size);
  1615. if(BOOST_JSON_UNLIKELY( temp.size() > max_size - total ))
  1616. {
  1617. BOOST_STATIC_CONSTEXPR source_location loc
  1618. = BOOST_CURRENT_LOCATION;
  1619. return fail(cs.begin(), ev_too_large, &loc);
  1620. }
  1621. total += temp.size();
  1622. bool const r = is_key
  1623. ? h_.on_key_part(temp.get(), total, ec_)
  1624. : h_.on_string_part(temp.get(), total, ec_);
  1625. if(BOOST_JSON_UNLIKELY( !r ))
  1626. return fail( cs.begin() );
  1627. }
  1628. return cs.begin();
  1629. }
  1630. //----------------------------------------------------------
  1631. template<class Handler>
  1632. template<
  1633. bool StackEmpty_,
  1634. bool AllowComments_/*,
  1635. bool AllowTrailing_,
  1636. bool AllowBadUTF8_*/>
  1637. const char*
  1638. basic_parser<Handler>::
  1639. parse_object(const char* p,
  1640. std::integral_constant<bool, StackEmpty_> stack_empty,
  1641. std::integral_constant<bool, AllowComments_> allow_comments,
  1642. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  1643. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  1644. bool allow_bad_utf16)
  1645. {
  1646. detail::const_stream_wrapper cs(p, end_);
  1647. std::size_t size;
  1648. if(! stack_empty && ! st_.empty())
  1649. {
  1650. // resume
  1651. state st;
  1652. st_.pop(st);
  1653. st_.pop(size);
  1654. switch(st)
  1655. {
  1656. default: BOOST_JSON_UNREACHABLE();
  1657. case state::obj1: goto do_obj1;
  1658. case state::obj2: goto do_obj2;
  1659. case state::obj3: goto do_obj3;
  1660. case state::obj4: goto do_obj4;
  1661. case state::obj5: goto do_obj5;
  1662. case state::obj6: goto do_obj6;
  1663. case state::obj7: goto do_obj7;
  1664. case state::obj8: goto do_obj8;
  1665. case state::obj9: goto do_obj9;
  1666. case state::obj10: goto do_obj10;
  1667. case state::obj11: goto do_obj11;
  1668. }
  1669. }
  1670. BOOST_ASSERT(*cs == '{');
  1671. size = 0;
  1672. if(BOOST_JSON_UNLIKELY(! depth_))
  1673. {
  1674. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1675. return fail(cs.begin(), error::too_deep, &loc);
  1676. }
  1677. --depth_;
  1678. if(BOOST_JSON_UNLIKELY(
  1679. ! h_.on_object_begin(ec_)))
  1680. return fail(cs.begin());
  1681. ++cs;
  1682. // object:
  1683. // '{' *ws '}'
  1684. // '{' *ws string *ws ':' *ws value *ws *[ ',' *ws string *ws ':' *ws value *ws ] '}'
  1685. do_obj1:
  1686. cs = detail::count_whitespace(cs.begin(), cs.end());
  1687. if(BOOST_JSON_UNLIKELY(! cs))
  1688. return maybe_suspend(cs.begin(), state::obj1, size);
  1689. if(BOOST_JSON_LIKELY(*cs != '}'))
  1690. {
  1691. if(BOOST_JSON_UNLIKELY(*cs != '\x22'))
  1692. {
  1693. if(allow_comments && *cs == '/')
  1694. {
  1695. do_obj2:
  1696. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1697. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1698. return suspend_or_fail(state::obj2, size);
  1699. goto do_obj1;
  1700. }
  1701. BOOST_STATIC_CONSTEXPR source_location loc
  1702. = BOOST_CURRENT_LOCATION;
  1703. return fail(cs.begin(), error::syntax, &loc);
  1704. }
  1705. loop:
  1706. if(BOOST_JSON_UNLIKELY(++size >
  1707. Handler::max_object_size))
  1708. {
  1709. BOOST_STATIC_CONSTEXPR source_location loc
  1710. = BOOST_CURRENT_LOCATION;
  1711. return fail(cs.begin(), error::object_too_large, &loc);
  1712. }
  1713. do_obj3:
  1714. cs = parse_string(cs.begin(), stack_empty, std::true_type(), allow_bad_utf8, allow_bad_utf16);
  1715. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1716. return suspend_or_fail(state::obj3, size);
  1717. do_obj4:
  1718. cs = detail::count_whitespace(cs.begin(), cs.end());
  1719. if(BOOST_JSON_UNLIKELY(! cs))
  1720. return maybe_suspend(cs.begin(), state::obj4, size);
  1721. if(BOOST_JSON_UNLIKELY(*cs != ':'))
  1722. {
  1723. if(allow_comments && *cs == '/')
  1724. {
  1725. do_obj5:
  1726. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1727. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1728. return suspend_or_fail(state::obj5, size);
  1729. goto do_obj4;
  1730. }
  1731. BOOST_STATIC_CONSTEXPR source_location loc
  1732. = BOOST_CURRENT_LOCATION;
  1733. return fail(cs.begin(), error::syntax, &loc);
  1734. }
  1735. ++cs;
  1736. do_obj6:
  1737. cs = detail::count_whitespace(cs.begin(), cs.end());
  1738. if(BOOST_JSON_UNLIKELY(! cs))
  1739. return maybe_suspend(cs.begin(), state::obj6, size);
  1740. do_obj7:
  1741. cs = parse_value(cs.begin(), stack_empty, allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  1742. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1743. return suspend_or_fail(state::obj7, size);
  1744. do_obj8:
  1745. cs = detail::count_whitespace(cs.begin(), cs.end());
  1746. if(BOOST_JSON_UNLIKELY(! cs))
  1747. return maybe_suspend(cs.begin(), state::obj8, size);
  1748. if(BOOST_JSON_LIKELY(*cs == ','))
  1749. {
  1750. ++cs;
  1751. do_obj9:
  1752. cs = detail::count_whitespace(cs.begin(), cs.end());
  1753. if(BOOST_JSON_UNLIKELY(! cs))
  1754. return maybe_suspend(cs.begin(), state::obj9, size);
  1755. // loop for next element
  1756. if(BOOST_JSON_LIKELY(*cs == '\x22'))
  1757. goto loop;
  1758. if(! allow_trailing || *cs != '}')
  1759. {
  1760. if(allow_comments && *cs == '/')
  1761. {
  1762. do_obj10:
  1763. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1764. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1765. return suspend_or_fail(state::obj10, size);
  1766. goto do_obj9;
  1767. }
  1768. BOOST_STATIC_CONSTEXPR source_location loc
  1769. = BOOST_CURRENT_LOCATION;
  1770. return fail(cs.begin(), error::syntax, &loc);
  1771. }
  1772. }
  1773. else if(BOOST_JSON_UNLIKELY(*cs != '}'))
  1774. {
  1775. if(allow_comments && *cs == '/')
  1776. {
  1777. do_obj11:
  1778. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1779. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1780. return suspend_or_fail(state::obj11, size);
  1781. goto do_obj8;
  1782. }
  1783. BOOST_STATIC_CONSTEXPR source_location loc
  1784. = BOOST_CURRENT_LOCATION;
  1785. return fail(cs.begin(), error::syntax, &loc);
  1786. }
  1787. // got closing brace, fall through
  1788. }
  1789. if(BOOST_JSON_UNLIKELY(
  1790. ! h_.on_object_end(size, ec_)))
  1791. return fail(cs.begin());
  1792. ++depth_;
  1793. ++cs;
  1794. return cs.begin();
  1795. }
  1796. //----------------------------------------------------------
  1797. template<class Handler>
  1798. template<
  1799. bool StackEmpty_,
  1800. bool AllowComments_/*,
  1801. bool AllowTrailing_,
  1802. bool AllowBadUTF8_*/>
  1803. const char*
  1804. basic_parser<Handler>::
  1805. parse_array(const char* p,
  1806. std::integral_constant<bool, StackEmpty_> stack_empty,
  1807. std::integral_constant<bool, AllowComments_> allow_comments,
  1808. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  1809. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  1810. bool allow_bad_utf16)
  1811. {
  1812. detail::const_stream_wrapper cs(p, end_);
  1813. std::size_t size;
  1814. if(! stack_empty && ! st_.empty())
  1815. {
  1816. // resume
  1817. state st;
  1818. st_.pop(st);
  1819. st_.pop(size);
  1820. switch(st)
  1821. {
  1822. default: BOOST_JSON_UNREACHABLE();
  1823. case state::arr1: goto do_arr1;
  1824. case state::arr2: goto do_arr2;
  1825. case state::arr3: goto do_arr3;
  1826. case state::arr4: goto do_arr4;
  1827. case state::arr5: goto do_arr5;
  1828. case state::arr6: goto do_arr6;
  1829. }
  1830. }
  1831. BOOST_ASSERT(*cs == '[');
  1832. size = 0;
  1833. if(BOOST_JSON_UNLIKELY(! depth_))
  1834. {
  1835. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1836. return fail(cs.begin(), error::too_deep, &loc);
  1837. }
  1838. --depth_;
  1839. if(BOOST_JSON_UNLIKELY(
  1840. ! h_.on_array_begin(ec_)))
  1841. return fail(cs.begin());
  1842. ++cs;
  1843. // array:
  1844. // '[' *ws ']'
  1845. // '[' *ws value *ws *[ ',' *ws value *ws ] ']'
  1846. do_arr1:
  1847. cs = detail::count_whitespace(cs.begin(), cs.end());
  1848. if(BOOST_JSON_UNLIKELY(! cs))
  1849. return maybe_suspend(cs.begin(), state::arr1, size);
  1850. if(BOOST_JSON_LIKELY(*cs != ']'))
  1851. {
  1852. loop:
  1853. if(allow_comments && *cs == '/')
  1854. {
  1855. do_arr2:
  1856. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1857. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1858. return suspend_or_fail(state::arr2, size);
  1859. goto do_arr1;
  1860. }
  1861. if(BOOST_JSON_UNLIKELY(++size >
  1862. Handler::max_array_size))
  1863. {
  1864. BOOST_STATIC_CONSTEXPR source_location loc
  1865. = BOOST_CURRENT_LOCATION;
  1866. return fail(cs.begin(), error::array_too_large, &loc);
  1867. }
  1868. do_arr3:
  1869. // array is not empty, value required
  1870. cs = parse_value(cs.begin(), stack_empty, allow_comments, allow_trailing, allow_bad_utf8, allow_bad_utf16);
  1871. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1872. return suspend_or_fail(state::arr3, size);
  1873. do_arr4:
  1874. cs = detail::count_whitespace(cs.begin(), cs.end());
  1875. if(BOOST_JSON_UNLIKELY(! cs))
  1876. return maybe_suspend(cs.begin(), state::arr4, size);
  1877. if(BOOST_JSON_LIKELY(*cs == ','))
  1878. {
  1879. ++cs;
  1880. do_arr5:
  1881. cs = detail::count_whitespace(cs.begin(), cs.end());
  1882. if(BOOST_JSON_UNLIKELY(! cs))
  1883. return maybe_suspend(cs.begin(), state::arr5, size);
  1884. // loop for next element
  1885. if(! allow_trailing || *cs != ']')
  1886. goto loop;
  1887. }
  1888. else if(BOOST_JSON_UNLIKELY(*cs != ']'))
  1889. {
  1890. if(allow_comments && *cs == '/')
  1891. {
  1892. do_arr6:
  1893. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1894. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1895. return suspend_or_fail(state::arr6, size);
  1896. goto do_arr4;
  1897. }
  1898. BOOST_STATIC_CONSTEXPR source_location loc
  1899. = BOOST_CURRENT_LOCATION;
  1900. return fail(cs.begin(), error::syntax, &loc);
  1901. }
  1902. // got closing bracket; fall through
  1903. }
  1904. if(BOOST_JSON_UNLIKELY(
  1905. ! h_.on_array_end(size, ec_)))
  1906. return fail(cs.begin());
  1907. ++depth_;
  1908. ++cs;
  1909. return cs.begin();
  1910. }
  1911. //----------------------------------------------------------
  1912. template<class Handler>
  1913. template<bool StackEmpty_, char First_, number_precision Numbers_>
  1914. const char*
  1915. basic_parser<Handler>::
  1916. parse_number(const char* p,
  1917. std::integral_constant<bool, StackEmpty_> stack_empty,
  1918. std::integral_constant<char, First_> first,
  1919. std::integral_constant<number_precision, Numbers_> mode)
  1920. {
  1921. constexpr bool precise_parsing = mode == number_precision::precise;
  1922. constexpr bool no_parsing = mode == number_precision::none;
  1923. // only one of these will be true if we are not resuming
  1924. // if negative then !zero_first && !nonzero_first
  1925. // if zero_first then !nonzero_first && !negative
  1926. // if nonzero_first then !zero_first && !negative
  1927. bool const negative = first == '-';
  1928. bool const zero_first = first == '0';
  1929. bool const nonzero_first = first == '+';
  1930. detail::const_stream_wrapper cs(p, end_);
  1931. number num;
  1932. const char* begin = cs.begin();
  1933. if(stack_empty || st_.empty())
  1934. {
  1935. num.bias = 0;
  1936. num.exp = 0;
  1937. num.frac = false;
  1938. num_buf_.clear();
  1939. //----------------------------------
  1940. //
  1941. // '-'
  1942. // leading minus sign
  1943. //
  1944. BOOST_ASSERT(cs);
  1945. if(negative)
  1946. ++cs;
  1947. num.neg = negative;
  1948. num.frac = false;
  1949. num.exp = 0;
  1950. num.bias = 0;
  1951. // fast path
  1952. if( cs.remain() >= 16 + 1 + 16 ) // digits . digits
  1953. {
  1954. int n1;
  1955. if( nonzero_first ||
  1956. (negative && *cs != '0') )
  1957. {
  1958. n1 = detail::count_digits( cs.begin() );
  1959. BOOST_ASSERT(n1 >= 0 && n1 <= 16);
  1960. if( negative && n1 == 0 && opt_.allow_infinity_and_nan )
  1961. {
  1962. return parse_literal(
  1963. p - 1, mp11::mp_int<detail::neg_infinity_literal>());
  1964. }
  1965. if( ! nonzero_first && n1 == 0 )
  1966. {
  1967. // digit required
  1968. BOOST_STATIC_CONSTEXPR source_location loc
  1969. = BOOST_CURRENT_LOCATION;
  1970. return fail(cs.begin(), error::syntax, &loc);
  1971. }
  1972. BOOST_IF_CONSTEXPR( !no_parsing )
  1973. num.mant = detail::parse_unsigned( 0, cs.begin(), n1 );
  1974. else
  1975. num.mant = 0;
  1976. cs += n1;
  1977. // integer or floating-point with
  1978. // >= 16 leading digits
  1979. if( n1 == 16 )
  1980. {
  1981. goto do_num2;
  1982. }
  1983. }
  1984. else
  1985. {
  1986. // 0. floating-point or 0e integer
  1987. num.mant = 0;
  1988. n1 = 0;
  1989. ++cs;
  1990. }
  1991. {
  1992. const char c = *cs;
  1993. if(c != '.')
  1994. {
  1995. if((c | 32) == 'e')
  1996. {
  1997. ++cs;
  1998. goto do_exp1;
  1999. }
  2000. BOOST_IF_CONSTEXPR( negative && !no_parsing )
  2001. num.mant = ~num.mant + 1;
  2002. goto finish_signed;
  2003. }
  2004. }
  2005. // floating-point number
  2006. ++cs;
  2007. int n2 = detail::count_digits( cs.begin() );
  2008. BOOST_ASSERT(n2 >= 0 && n2 <= 16);
  2009. if( n2 == 0 )
  2010. {
  2011. // digit required
  2012. BOOST_STATIC_CONSTEXPR source_location loc
  2013. = BOOST_CURRENT_LOCATION;
  2014. return fail(cs.begin(), error::syntax, &loc);
  2015. }
  2016. // floating-point mantissa overflow
  2017. if( n1 + n2 >= 19 )
  2018. {
  2019. goto do_num7;
  2020. }
  2021. BOOST_IF_CONSTEXPR( !no_parsing )
  2022. num.mant = detail::parse_unsigned( num.mant, cs.begin(), n2 );
  2023. BOOST_ASSERT(num.bias == 0);
  2024. num.bias -= n2;
  2025. cs += n2;
  2026. char ch = *cs;
  2027. if( (ch | 32) == 'e' )
  2028. {
  2029. ++cs;
  2030. goto do_exp1;
  2031. }
  2032. else if( ch >= '0' && ch <= '9' )
  2033. {
  2034. goto do_num8;
  2035. }
  2036. goto finish_dub;
  2037. }
  2038. }
  2039. else
  2040. {
  2041. num = num_;
  2042. state st;
  2043. st_.pop(st);
  2044. switch(st)
  2045. {
  2046. default: BOOST_JSON_UNREACHABLE();
  2047. case state::num1: goto do_num1;
  2048. case state::num2: goto do_num2;
  2049. case state::num3: goto do_num3;
  2050. case state::num4: goto do_num4;
  2051. case state::num5: goto do_num5;
  2052. case state::num6: goto do_num6;
  2053. case state::num7: goto do_num7;
  2054. case state::num8: goto do_num8;
  2055. case state::exp1: goto do_exp1;
  2056. case state::exp2: goto do_exp2;
  2057. case state::exp3: goto do_exp3;
  2058. }
  2059. }
  2060. //----------------------------------
  2061. //
  2062. // DIGIT
  2063. // first digit
  2064. //
  2065. do_num1:
  2066. if(zero_first || nonzero_first ||
  2067. BOOST_JSON_LIKELY(cs))
  2068. {
  2069. char const c = *cs;
  2070. if(zero_first)
  2071. {
  2072. ++cs;
  2073. num.mant = 0;
  2074. goto do_num6;
  2075. }
  2076. else if(nonzero_first || BOOST_JSON_LIKELY(
  2077. c >= '1' && c <= '9'))
  2078. {
  2079. ++cs;
  2080. num.mant = c - '0';
  2081. }
  2082. else if(BOOST_JSON_UNLIKELY(
  2083. c == '0'))
  2084. {
  2085. ++cs;
  2086. num.mant = 0;
  2087. goto do_num6;
  2088. }
  2089. else if( (negative || num.neg) && opt_.allow_infinity_and_nan )
  2090. {
  2091. st_.push(state::lit1);
  2092. cur_lit_ = detail::neg_infinity_literal;
  2093. lit_offset_ = 1;
  2094. return parse_literal(
  2095. cs.begin(), mp11::mp_int<detail::resume_literal>() );
  2096. }
  2097. else
  2098. {
  2099. BOOST_STATIC_CONSTEXPR source_location loc
  2100. = BOOST_CURRENT_LOCATION;
  2101. return fail(cs.begin(), error::syntax, &loc);
  2102. }
  2103. }
  2104. else
  2105. {
  2106. if(BOOST_JSON_UNLIKELY(
  2107. ! h_.on_number_part(
  2108. {begin, cs.used(begin)}, ec_)))
  2109. return fail(cs.begin());
  2110. BOOST_IF_CONSTEXPR( precise_parsing )
  2111. num_buf_.append( begin, cs.used(begin) );
  2112. return maybe_suspend(
  2113. cs.begin(), state::num1, num);
  2114. }
  2115. //----------------------------------
  2116. //
  2117. // 1*DIGIT
  2118. // significant digits left of decimal
  2119. //
  2120. do_num2:
  2121. if(negative || (!stack_empty && num.neg))
  2122. {
  2123. for(;;)
  2124. {
  2125. if(BOOST_JSON_UNLIKELY(! cs))
  2126. {
  2127. if(BOOST_JSON_UNLIKELY(more_))
  2128. {
  2129. if(BOOST_JSON_UNLIKELY(
  2130. ! h_.on_number_part(
  2131. {begin, cs.used(begin)}, ec_)))
  2132. return fail(cs.begin());
  2133. BOOST_IF_CONSTEXPR( precise_parsing )
  2134. num_buf_.append( begin, cs.used(begin) );
  2135. return suspend(cs.begin(), state::num2, num);
  2136. }
  2137. goto finish_int;
  2138. }
  2139. char const c = *cs;
  2140. if(BOOST_JSON_LIKELY(
  2141. c >= '0' && c <= '9'))
  2142. {
  2143. ++cs;
  2144. // 9223372036854775808 INT64_MIN
  2145. if( num.mant > 922337203685477580 || (
  2146. num.mant == 922337203685477580 && c > '8'))
  2147. break;
  2148. BOOST_IF_CONSTEXPR( !no_parsing )
  2149. num.mant = 10 * num.mant + ( c - '0' );
  2150. continue;
  2151. }
  2152. goto do_num6; // [.eE]
  2153. }
  2154. }
  2155. else
  2156. {
  2157. for(;;)
  2158. {
  2159. if(BOOST_JSON_UNLIKELY(! cs))
  2160. {
  2161. if(BOOST_JSON_UNLIKELY(more_))
  2162. {
  2163. if(BOOST_JSON_UNLIKELY(
  2164. ! h_.on_number_part(
  2165. {begin, cs.used(begin)}, ec_)))
  2166. return fail(cs.begin());
  2167. BOOST_IF_CONSTEXPR( precise_parsing )
  2168. num_buf_.append( begin, cs.used(begin) );
  2169. return suspend(cs.begin(), state::num2, num);
  2170. }
  2171. goto finish_int;
  2172. }
  2173. char const c = *cs;
  2174. if(BOOST_JSON_LIKELY(
  2175. c >= '0' && c <= '9'))
  2176. {
  2177. ++cs;
  2178. // 18446744073709551615 UINT64_MAX
  2179. if( num.mant > 1844674407370955161 || (
  2180. num.mant == 1844674407370955161 && c > '5'))
  2181. break;
  2182. BOOST_IF_CONSTEXPR( !no_parsing )
  2183. num.mant = 10 * num.mant + ( c - '0' );
  2184. }
  2185. else
  2186. {
  2187. goto do_num6; // [.eE]
  2188. }
  2189. }
  2190. }
  2191. ++num.bias;
  2192. //----------------------------------
  2193. //
  2194. // 1*DIGIT
  2195. // non-significant digits left of decimal
  2196. //
  2197. do_num3:
  2198. for(;;)
  2199. {
  2200. if(BOOST_JSON_UNLIKELY(! cs))
  2201. {
  2202. if(BOOST_JSON_UNLIKELY(more_))
  2203. {
  2204. if(BOOST_JSON_UNLIKELY(
  2205. ! h_.on_number_part(
  2206. {begin, cs.used(begin)}, ec_)))
  2207. return fail(cs.begin());
  2208. BOOST_IF_CONSTEXPR( precise_parsing )
  2209. num_buf_.append( begin, cs.used(begin) );
  2210. return suspend(cs.begin(), state::num3, num);
  2211. }
  2212. goto finish_dub;
  2213. }
  2214. char const c = *cs;
  2215. if(BOOST_JSON_UNLIKELY(
  2216. c >= '0' && c <= '9'))
  2217. {
  2218. if(BOOST_JSON_UNLIKELY( num.bias + 1 == INT_MAX ))
  2219. {
  2220. BOOST_STATIC_CONSTEXPR source_location loc
  2221. = BOOST_CURRENT_LOCATION;
  2222. return fail(cs.begin(), error::exponent_overflow, &loc);
  2223. }
  2224. ++cs;
  2225. ++num.bias;
  2226. }
  2227. else if(BOOST_JSON_LIKELY(
  2228. c == '.'))
  2229. {
  2230. ++cs;
  2231. break;
  2232. }
  2233. else if((c | 32) == 'e')
  2234. {
  2235. ++cs;
  2236. goto do_exp1;
  2237. }
  2238. else
  2239. {
  2240. goto finish_dub;
  2241. }
  2242. }
  2243. //----------------------------------
  2244. //
  2245. // DIGIT
  2246. // first non-significant digit
  2247. // to the right of decimal
  2248. //
  2249. do_num4:
  2250. {
  2251. if(BOOST_JSON_UNLIKELY(! cs))
  2252. {
  2253. if(BOOST_JSON_UNLIKELY(
  2254. ! h_.on_number_part(
  2255. {begin, cs.used(begin)}, ec_)))
  2256. return fail(cs.begin());
  2257. BOOST_IF_CONSTEXPR( precise_parsing )
  2258. num_buf_.append( begin, cs.used(begin) );
  2259. return maybe_suspend(
  2260. cs.begin(), state::num4, num);
  2261. }
  2262. char const c = *cs;
  2263. if(BOOST_JSON_LIKELY(
  2264. //static_cast<unsigned char>(c - '0') < 10))
  2265. c >= '0' && c <= '9'))
  2266. {
  2267. ++cs;
  2268. }
  2269. else
  2270. {
  2271. // digit required
  2272. BOOST_STATIC_CONSTEXPR source_location loc
  2273. = BOOST_CURRENT_LOCATION;
  2274. return fail(cs.begin(), error::syntax, &loc);
  2275. }
  2276. }
  2277. //----------------------------------
  2278. //
  2279. // 1*DIGIT
  2280. // non-significant digits
  2281. // to the right of decimal
  2282. //
  2283. do_num5:
  2284. for(;;)
  2285. {
  2286. if(BOOST_JSON_UNLIKELY(! cs))
  2287. {
  2288. if(BOOST_JSON_UNLIKELY(more_))
  2289. {
  2290. if(BOOST_JSON_UNLIKELY(
  2291. ! h_.on_number_part(
  2292. {begin, cs.used(begin)}, ec_)))
  2293. return fail(cs.begin());
  2294. BOOST_IF_CONSTEXPR( precise_parsing )
  2295. num_buf_.append( begin, cs.used(begin) );
  2296. return suspend(cs.begin(), state::num5, num);
  2297. }
  2298. goto finish_dub;
  2299. }
  2300. char const c = *cs;
  2301. if(BOOST_JSON_LIKELY(
  2302. c >= '0' && c <= '9'))
  2303. {
  2304. ++cs;
  2305. }
  2306. else if((c | 32) == 'e')
  2307. {
  2308. ++cs;
  2309. goto do_exp1;
  2310. }
  2311. else
  2312. {
  2313. goto finish_dub;
  2314. }
  2315. }
  2316. //----------------------------------
  2317. //
  2318. // [.eE]
  2319. //
  2320. do_num6:
  2321. {
  2322. if(BOOST_JSON_UNLIKELY(! cs))
  2323. {
  2324. if(BOOST_JSON_UNLIKELY(more_))
  2325. {
  2326. if(BOOST_JSON_UNLIKELY(
  2327. ! h_.on_number_part(
  2328. {begin, cs.used(begin)}, ec_)))
  2329. return fail(cs.begin());
  2330. BOOST_IF_CONSTEXPR( precise_parsing )
  2331. num_buf_.append( begin, cs.used(begin) );
  2332. return suspend(cs.begin(), state::num6, num);
  2333. }
  2334. goto finish_int;
  2335. }
  2336. char const c = *cs;
  2337. if(BOOST_JSON_LIKELY(
  2338. c == '.'))
  2339. {
  2340. ++cs;
  2341. }
  2342. else if((c | 32) == 'e')
  2343. {
  2344. ++cs;
  2345. goto do_exp1;
  2346. }
  2347. else
  2348. {
  2349. goto finish_int;
  2350. }
  2351. }
  2352. //----------------------------------
  2353. //
  2354. // DIGIT
  2355. // first significant digit
  2356. // to the right of decimal
  2357. //
  2358. do_num7:
  2359. {
  2360. if(BOOST_JSON_UNLIKELY(! cs))
  2361. {
  2362. if(BOOST_JSON_UNLIKELY(more_))
  2363. {
  2364. if(BOOST_JSON_UNLIKELY(
  2365. ! h_.on_number_part(
  2366. {begin, cs.used(begin)}, ec_)))
  2367. return fail(cs.begin());
  2368. BOOST_IF_CONSTEXPR( precise_parsing )
  2369. num_buf_.append( begin, cs.used(begin) );
  2370. return suspend(cs.begin(), state::num7, num);
  2371. }
  2372. // digit required
  2373. BOOST_STATIC_CONSTEXPR source_location loc
  2374. = BOOST_CURRENT_LOCATION;
  2375. return fail(cs.begin(), error::syntax, &loc);
  2376. }
  2377. char const c = *cs;
  2378. if(BOOST_JSON_UNLIKELY(
  2379. c < '0' || c > '9'))
  2380. {
  2381. // digit required
  2382. BOOST_STATIC_CONSTEXPR source_location loc
  2383. = BOOST_CURRENT_LOCATION;
  2384. return fail(cs.begin(), error::syntax, &loc);
  2385. }
  2386. }
  2387. //----------------------------------
  2388. //
  2389. // 1*DIGIT
  2390. // significant digits
  2391. // to the right of decimal
  2392. //
  2393. do_num8:
  2394. for(;;)
  2395. {
  2396. if(BOOST_JSON_UNLIKELY(! cs))
  2397. {
  2398. if(BOOST_JSON_UNLIKELY(more_))
  2399. {
  2400. if(BOOST_JSON_UNLIKELY(
  2401. ! h_.on_number_part(
  2402. {begin, cs.used(begin)}, ec_)))
  2403. return fail(cs.begin());
  2404. BOOST_IF_CONSTEXPR( precise_parsing )
  2405. num_buf_.append( begin, cs.used(begin) );
  2406. return suspend(cs.begin(), state::num8, num);
  2407. }
  2408. goto finish_dub;
  2409. }
  2410. char const c = *cs;
  2411. if(BOOST_JSON_LIKELY(
  2412. c >= '0' && c <= '9'))
  2413. {
  2414. ++cs;
  2415. if(!no_parsing && BOOST_JSON_LIKELY(
  2416. num.mant <= 9007199254740991)) // 2^53-1
  2417. {
  2418. if(BOOST_JSON_UNLIKELY( num.bias - 1 == INT_MIN ))
  2419. {
  2420. BOOST_STATIC_CONSTEXPR source_location loc
  2421. = BOOST_CURRENT_LOCATION;
  2422. return fail(cs.begin(), error::exponent_overflow, &loc);
  2423. }
  2424. --num.bias;
  2425. num.mant = 10 * num.mant + ( c - '0' );
  2426. }
  2427. else
  2428. {
  2429. goto do_num5;
  2430. }
  2431. }
  2432. else if((c | 32) == 'e')
  2433. {
  2434. ++cs;
  2435. goto do_exp1;
  2436. }
  2437. else
  2438. {
  2439. goto finish_dub;
  2440. }
  2441. }
  2442. //----------------------------------
  2443. //
  2444. // *[+-]
  2445. //
  2446. do_exp1:
  2447. if(BOOST_JSON_UNLIKELY(! cs))
  2448. {
  2449. if(BOOST_JSON_UNLIKELY(
  2450. ! h_.on_number_part(
  2451. {begin, cs.used(begin)}, ec_)))
  2452. return fail(cs.begin());
  2453. BOOST_IF_CONSTEXPR( precise_parsing )
  2454. num_buf_.append( begin, cs.used(begin) );
  2455. return maybe_suspend(
  2456. cs.begin(), state::exp1, num);
  2457. }
  2458. if(*cs == '+')
  2459. {
  2460. ++cs;
  2461. }
  2462. else if(*cs == '-')
  2463. {
  2464. ++cs;
  2465. num.frac = true;
  2466. }
  2467. //----------------------------------
  2468. //
  2469. // DIGIT
  2470. // first digit of the exponent
  2471. //
  2472. do_exp2:
  2473. {
  2474. if(BOOST_JSON_UNLIKELY(! cs))
  2475. {
  2476. if(BOOST_JSON_UNLIKELY(more_))
  2477. {
  2478. if(BOOST_JSON_UNLIKELY(
  2479. ! h_.on_number_part(
  2480. {begin, cs.used(begin)}, ec_)))
  2481. return fail(cs.begin());
  2482. BOOST_IF_CONSTEXPR( precise_parsing )
  2483. num_buf_.append( begin, cs.used(begin) );
  2484. return suspend(cs.begin(), state::exp2, num);
  2485. }
  2486. // digit required
  2487. BOOST_STATIC_CONSTEXPR source_location loc
  2488. = BOOST_CURRENT_LOCATION;
  2489. return fail(cs.begin(), error::syntax, &loc);
  2490. }
  2491. char const c = *cs;
  2492. if(BOOST_JSON_UNLIKELY(
  2493. c < '0' || c > '9'))
  2494. {
  2495. // digit required
  2496. BOOST_STATIC_CONSTEXPR source_location loc
  2497. = BOOST_CURRENT_LOCATION;
  2498. return fail(cs.begin(), error::syntax, &loc);
  2499. }
  2500. ++cs;
  2501. num.exp = c - '0';
  2502. }
  2503. //----------------------------------
  2504. //
  2505. // 1*DIGIT
  2506. // subsequent digits in the exponent
  2507. //
  2508. do_exp3:
  2509. for(;;)
  2510. {
  2511. if(BOOST_JSON_UNLIKELY(! cs))
  2512. {
  2513. if(BOOST_JSON_UNLIKELY(more_))
  2514. {
  2515. if(BOOST_JSON_UNLIKELY(
  2516. ! h_.on_number_part(
  2517. {begin, cs.used(begin)}, ec_)))
  2518. return fail(cs.begin());
  2519. BOOST_IF_CONSTEXPR( precise_parsing )
  2520. num_buf_.append( begin, cs.used(begin) );
  2521. return suspend(cs.begin(), state::exp3, num);
  2522. }
  2523. }
  2524. else
  2525. {
  2526. char const c = *cs;
  2527. if(BOOST_JSON_LIKELY( c >= '0' && c <= '9' ))
  2528. {
  2529. if(BOOST_JSON_UNLIKELY(
  2530. // 2147483647 INT_MAX
  2531. num.exp > 214748364 ||
  2532. (num.exp == 214748364 && c > '7')
  2533. ))
  2534. num.exp = INT_MAX;
  2535. else BOOST_IF_CONSTEXPR( !no_parsing )
  2536. num.exp = 10 * num.exp + ( c - '0' );
  2537. ++cs;
  2538. continue;
  2539. }
  2540. }
  2541. BOOST_ASSERT(num.exp >= 0);
  2542. if ( num.frac )
  2543. {
  2544. if(BOOST_JSON_UNLIKELY( num.bias < (INT_MIN + num.exp) ))
  2545. {
  2546. // if exponent overflowed, bias is a very large negative
  2547. // number, and mantissa isn't zero, then we cannot parse the
  2548. // number correctly
  2549. if(BOOST_JSON_UNLIKELY(
  2550. (num.exp == INT_MAX) &&
  2551. (num.bias < 0) &&
  2552. (num.exp + num.bias < 308) &&
  2553. num.mant ))
  2554. {
  2555. BOOST_STATIC_CONSTEXPR source_location loc
  2556. = BOOST_CURRENT_LOCATION;
  2557. return fail(cs.begin(), error::exponent_overflow, &loc);
  2558. }
  2559. num.bias = 0;
  2560. num.exp = INT_MAX;
  2561. }
  2562. }
  2563. else if (BOOST_JSON_UNLIKELY( num.bias > (INT_MAX - num.exp) ))
  2564. {
  2565. // if exponent overflowed, bias is a very large positive number,
  2566. // and mantissa isn't zero, then we cannot parse the
  2567. // number correctly
  2568. if(BOOST_JSON_UNLIKELY(
  2569. (num.exp == INT_MAX) &&
  2570. (num.bias > 0) &&
  2571. (num.exp - num.bias < 308) &&
  2572. num.mant ))
  2573. {
  2574. BOOST_STATIC_CONSTEXPR source_location loc
  2575. = BOOST_CURRENT_LOCATION;
  2576. return fail(cs.begin(), error::exponent_overflow, &loc);
  2577. }
  2578. num.bias = 0;
  2579. num.exp = INT_MAX;
  2580. }
  2581. goto finish_dub;
  2582. }
  2583. finish_int:
  2584. if(negative || (!stack_empty && num.neg))
  2585. {
  2586. if(BOOST_JSON_UNLIKELY(
  2587. ! h_.on_int64(static_cast<
  2588. int64_t>(~num.mant + 1), {begin, cs.used(begin)}, ec_)))
  2589. return fail(cs.begin());
  2590. return cs.begin();
  2591. }
  2592. if(num.mant <= INT64_MAX)
  2593. {
  2594. finish_signed:
  2595. if(BOOST_JSON_UNLIKELY(
  2596. ! h_.on_int64(static_cast<
  2597. int64_t>(num.mant), {begin, cs.used(begin)}, ec_)))
  2598. return fail(cs.begin());
  2599. return cs.begin();
  2600. }
  2601. if(BOOST_JSON_UNLIKELY(
  2602. ! h_.on_uint64(num.mant, {begin, cs.used(begin)}, ec_)))
  2603. return fail(cs.begin());
  2604. return cs.begin();
  2605. finish_dub:
  2606. double d;
  2607. std::size_t const size = cs.used(begin);
  2608. BOOST_ASSERT( !num_buf_.size() || precise_parsing );
  2609. BOOST_IF_CONSTEXPR( precise_parsing )
  2610. {
  2611. char const* data = begin;
  2612. std::size_t full_size = size;
  2613. // if we previously suspended or if the current input ends with the
  2614. // number, we need to copy the current part of the number to the
  2615. // temporary buffer
  2616. if(BOOST_JSON_UNLIKELY( num_buf_.size() ))
  2617. {
  2618. data = num_buf_.append( begin, size );
  2619. full_size = num_buf_.size();
  2620. }
  2621. auto const err = detail::charconv::from_chars(
  2622. data, data + full_size, d );
  2623. BOOST_ASSERT( err.ec != std::errc::invalid_argument );
  2624. BOOST_ASSERT( err.ptr == data + full_size );
  2625. (void)err;
  2626. }
  2627. else BOOST_IF_CONSTEXPR( no_parsing )
  2628. d = 0;
  2629. else
  2630. d = detail::dec_to_float(
  2631. num.mant,
  2632. num.bias + (num.frac ?
  2633. -num.exp : num.exp),
  2634. num.neg);
  2635. if(BOOST_JSON_UNLIKELY(
  2636. ! h_.on_double(d, {begin, size}, ec_)))
  2637. return fail(cs.begin());
  2638. return cs.begin();
  2639. }
  2640. //----------------------------------------------------------
  2641. template<class Handler>
  2642. template<class... Args>
  2643. basic_parser<Handler>::
  2644. basic_parser(
  2645. parse_options const& opt,
  2646. Args&&... args)
  2647. : h_(std::forward<Args>(args)...)
  2648. , opt_(opt)
  2649. {
  2650. }
  2651. //----------------------------------------------------------
  2652. template<class Handler>
  2653. void
  2654. basic_parser<Handler>::
  2655. reset() noexcept
  2656. {
  2657. ec_ = {};
  2658. st_.clear();
  2659. more_ = true;
  2660. done_ = false;
  2661. clean_ = true;
  2662. num_buf_.clear();
  2663. }
  2664. template<class Handler>
  2665. void
  2666. basic_parser<Handler>::
  2667. fail(system::error_code ec) noexcept
  2668. {
  2669. if(! ec)
  2670. {
  2671. // assign an arbitrary
  2672. // error code to prevent UB
  2673. BOOST_JSON_FAIL(ec_, error::incomplete);
  2674. }
  2675. else
  2676. {
  2677. ec_ = ec;
  2678. }
  2679. done_ = false;
  2680. }
  2681. //----------------------------------------------------------
  2682. template<class Handler>
  2683. std::size_t
  2684. basic_parser<Handler>::
  2685. write_some(
  2686. bool more,
  2687. char const* data,
  2688. std::size_t size,
  2689. system::error_code& ec)
  2690. {
  2691. // see if we exited via exception
  2692. // on the last call to write_some
  2693. if(! clean_)
  2694. {
  2695. // prevent UB
  2696. if(! ec_)
  2697. {
  2698. BOOST_JSON_FAIL(ec_, error::exception);
  2699. }
  2700. }
  2701. if(ec_)
  2702. {
  2703. // error is sticky
  2704. ec = ec_;
  2705. return 0;
  2706. }
  2707. clean_ = false;
  2708. more_ = more;
  2709. end_ = data + size;
  2710. const char* p;
  2711. if(BOOST_JSON_LIKELY(st_.empty()))
  2712. {
  2713. // first time
  2714. depth_ = opt_.max_depth;
  2715. if(BOOST_JSON_UNLIKELY(
  2716. ! h_.on_document_begin(ec_)))
  2717. {
  2718. ec = ec_;
  2719. return 0;
  2720. }
  2721. p = parse_document(data, std::true_type());
  2722. }
  2723. else
  2724. {
  2725. p = parse_document(data, std::false_type());
  2726. }
  2727. if(BOOST_JSON_LIKELY(p != sentinel()))
  2728. {
  2729. BOOST_ASSERT(! ec_);
  2730. if(! done_)
  2731. {
  2732. done_ = true;
  2733. h_.on_document_end(ec_);
  2734. }
  2735. }
  2736. else
  2737. {
  2738. if(! ec_)
  2739. {
  2740. if(! more_)
  2741. {
  2742. BOOST_JSON_FAIL(ec_, error::incomplete);
  2743. }
  2744. else if(! st_.empty())
  2745. {
  2746. // consume as much trailing whitespace in
  2747. // the JSON document as possible, but still
  2748. // consider the parse complete
  2749. state st;
  2750. st_.peek(st);
  2751. if( st == state::doc3 &&
  2752. ! done_)
  2753. {
  2754. done_ = true;
  2755. h_.on_document_end(ec_);
  2756. }
  2757. }
  2758. }
  2759. p = end_;
  2760. }
  2761. ec = ec_;
  2762. clean_ = true;
  2763. return p - data;
  2764. }
  2765. template<class Handler>
  2766. std::size_t
  2767. basic_parser<Handler>::
  2768. write_some(
  2769. bool more,
  2770. char const* data,
  2771. std::size_t size,
  2772. std::error_code& ec)
  2773. {
  2774. system::error_code jec;
  2775. std::size_t const result = write_some(more, data, size, jec);
  2776. ec = jec;
  2777. return result;
  2778. }
  2779. #endif
  2780. } // namespace json
  2781. } // namespace boost
  2782. #ifdef _MSC_VER
  2783. #pragma warning(pop)
  2784. #endif
  2785. #endif