basic_parser.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. //
  2. // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
  3. // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_HPP
  11. #define BOOST_JSON_BASIC_PARSER_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/detail/except.hpp>
  14. #include <boost/json/error.hpp>
  15. #include <boost/json/kind.hpp>
  16. #include <boost/json/parse_options.hpp>
  17. #include <boost/json/detail/stack.hpp>
  18. #include <boost/json/detail/stream.hpp>
  19. #include <boost/json/detail/utf8.hpp>
  20. #include <boost/json/detail/sbo_buffer.hpp>
  21. namespace boost {
  22. namespace json {
  23. /** An incremental SAX parser for serialized JSON.
  24. This implements a SAX-style parser, invoking a
  25. caller-supplied handler with each parsing event.
  26. To use, first declare a variable of type
  27. `basic_parser<T>` where `T` meets the handler
  28. requirements specified below. Then call
  29. @ref write_some one or more times with the input,
  30. setting `more = false` on the final buffer.
  31. The parsing events are realized through member
  32. function calls on the handler, which exists
  33. as a data member of the parser.
  34. \n
  35. The parser may dynamically allocate intermediate
  36. storage as needed to accommodate the nesting level
  37. of the input JSON. On subsequent invocations, the
  38. parser can cheaply re-use this memory, improving
  39. performance. This storage is freed when the
  40. parser is destroyed
  41. @par Usage
  42. To get the declaration and function definitions
  43. for this class it is necessary to include this
  44. file instead:
  45. @code
  46. #include <boost/json/basic_parser_impl.hpp>
  47. @endcode
  48. Users who wish to parse JSON into the DOM container
  49. @ref value will not use this class directly; instead
  50. they will create an instance of @ref parser or
  51. @ref stream_parser and use that instead. Alternatively,
  52. they may call the function @ref parse. This class is
  53. designed for users who wish to perform custom actions
  54. instead of building a @ref value. For example, to
  55. produce a DOM from an external library.
  56. \n
  57. @note
  58. By default, only conforming JSON using UTF-8
  59. encoding is accepted. However, select non-compliant
  60. syntax can be allowed by construction using a
  61. @ref parse_options set to desired values.
  62. @par Handler
  63. The handler provided must be implemented as an
  64. object of class type which defines each of the
  65. required event member functions below. The event
  66. functions return a `bool` where `true` indicates
  67. success, and `false` indicates failure. If the
  68. member function returns `false`, it must set
  69. the error code to a suitable value. This error
  70. code will be returned by the write function to
  71. the caller.
  72. \n
  73. Handlers are required to declare the maximum
  74. limits on various elements. If these limits
  75. are exceeded during parsing, then parsing
  76. fails with an error.
  77. \n
  78. The following declaration meets the parser's
  79. handler requirements:
  80. @code
  81. struct handler
  82. {
  83. /// The maximum number of elements allowed in an array
  84. static constexpr std::size_t max_array_size = -1;
  85. /// The maximum number of elements allowed in an object
  86. static constexpr std::size_t max_object_size = -1;
  87. /// The maximum number of characters allowed in a string
  88. static constexpr std::size_t max_string_size = -1;
  89. /// The maximum number of characters allowed in a key
  90. static constexpr std::size_t max_key_size = -1;
  91. /// Called once when the JSON parsing begins.
  92. ///
  93. /// @return `true` on success.
  94. /// @param ec Set to the error, if any occurred.
  95. ///
  96. bool on_document_begin( error_code& ec );
  97. /// Called when the JSON parsing is done.
  98. ///
  99. /// @return `true` on success.
  100. /// @param ec Set to the error, if any occurred.
  101. ///
  102. bool on_document_end( error_code& ec );
  103. /// Called when the beginning of an array is encountered.
  104. ///
  105. /// @return `true` on success.
  106. /// @param ec Set to the error, if any occurred.
  107. ///
  108. bool on_array_begin( error_code& ec );
  109. /// Called when the end of the current array is encountered.
  110. ///
  111. /// @return `true` on success.
  112. /// @param n The number of elements in the array.
  113. /// @param ec Set to the error, if any occurred.
  114. ///
  115. bool on_array_end( std::size_t n, error_code& ec );
  116. /// Called when the beginning of an object is encountered.
  117. ///
  118. /// @return `true` on success.
  119. /// @param ec Set to the error, if any occurred.
  120. ///
  121. bool on_object_begin( error_code& ec );
  122. /// Called when the end of the current object is encountered.
  123. ///
  124. /// @return `true` on success.
  125. /// @param n The number of elements in the object.
  126. /// @param ec Set to the error, if any occurred.
  127. ///
  128. bool on_object_end( std::size_t n, error_code& ec );
  129. /// Called with characters corresponding to part of the current string.
  130. ///
  131. /// @return `true` on success.
  132. /// @param s The partial characters
  133. /// @param n The total size of the string thus far
  134. /// @param ec Set to the error, if any occurred.
  135. ///
  136. bool on_string_part( string_view s, std::size_t n, error_code& ec );
  137. /// Called with the last characters corresponding to the current string.
  138. ///
  139. /// @return `true` on success.
  140. /// @param s The remaining characters
  141. /// @param n The total size of the string
  142. /// @param ec Set to the error, if any occurred.
  143. ///
  144. bool on_string( string_view s, std::size_t n, error_code& ec );
  145. /// Called with characters corresponding to part of the current key.
  146. ///
  147. /// @return `true` on success.
  148. /// @param s The partial characters
  149. /// @param n The total size of the key thus far
  150. /// @param ec Set to the error, if any occurred.
  151. ///
  152. bool on_key_part( string_view s, std::size_t n, error_code& ec );
  153. /// Called with the last characters corresponding to the current key.
  154. ///
  155. /// @return `true` on success.
  156. /// @param s The remaining characters
  157. /// @param n The total size of the key
  158. /// @param ec Set to the error, if any occurred.
  159. ///
  160. bool on_key( string_view s, std::size_t n, error_code& ec );
  161. /// Called with the characters corresponding to part of the current number.
  162. ///
  163. /// @return `true` on success.
  164. /// @param s The partial characters
  165. /// @param ec Set to the error, if any occurred.
  166. ///
  167. bool on_number_part( string_view s, error_code& ec );
  168. /// Called when a signed integer is parsed.
  169. ///
  170. /// @return `true` on success.
  171. /// @param i The value
  172. /// @param s The remaining characters
  173. /// @param ec Set to the error, if any occurred.
  174. ///
  175. bool on_int64( int64_t i, string_view s, error_code& ec );
  176. /// Called when an unsigend integer is parsed.
  177. ///
  178. /// @return `true` on success.
  179. /// @param u The value
  180. /// @param s The remaining characters
  181. /// @param ec Set to the error, if any occurred.
  182. ///
  183. bool on_uint64( uint64_t u, string_view s, error_code& ec );
  184. /// Called when a double is parsed.
  185. ///
  186. /// @return `true` on success.
  187. /// @param d The value
  188. /// @param s The remaining characters
  189. /// @param ec Set to the error, if any occurred.
  190. ///
  191. bool on_double( double d, string_view s, error_code& ec );
  192. /// Called when a boolean is parsed.
  193. ///
  194. /// @return `true` on success.
  195. /// @param b The value
  196. /// @param s The remaining characters
  197. /// @param ec Set to the error, if any occurred.
  198. ///
  199. bool on_bool( bool b, error_code& ec );
  200. /// Called when a null is parsed.
  201. ///
  202. /// @return `true` on success.
  203. /// @param ec Set to the error, if any occurred.
  204. ///
  205. bool on_null( error_code& ec );
  206. /// Called with characters corresponding to part of the current comment.
  207. ///
  208. /// @return `true` on success.
  209. /// @param s The partial characters.
  210. /// @param ec Set to the error, if any occurred.
  211. ///
  212. bool on_comment_part( string_view s, error_code& ec );
  213. /// Called with the last characters corresponding to the current comment.
  214. ///
  215. /// @return `true` on success.
  216. /// @param s The remaining characters
  217. /// @param ec Set to the error, if any occurred.
  218. ///
  219. bool on_comment( string_view s, error_code& ec );
  220. };
  221. @endcode
  222. @see
  223. @ref parse,
  224. @ref stream_parser,
  225. [Validating parser example](../../doc/html/json/examples.html#json.examples.validate).
  226. @headerfile <boost/json/basic_parser.hpp>
  227. */
  228. template<class Handler>
  229. class basic_parser
  230. {
  231. enum class state : char
  232. {
  233. doc1, doc3,
  234. com1, com2, com3, com4,
  235. lit1,
  236. str1, str2, str3, str4,
  237. str5, str6, str7, str8,
  238. sur1, sur2, sur3,
  239. sur4, sur5, sur6,
  240. obj1, obj2, obj3, obj4,
  241. obj5, obj6, obj7, obj8,
  242. obj9, obj10, obj11,
  243. arr1, arr2, arr3,
  244. arr4, arr5, arr6,
  245. num1, num2, num3, num4,
  246. num5, num6, num7, num8,
  247. exp1, exp2, exp3,
  248. val1, val2, val3
  249. };
  250. struct number
  251. {
  252. uint64_t mant;
  253. int bias;
  254. int exp;
  255. bool frac;
  256. bool neg;
  257. };
  258. template< bool StackEmpty_, char First_ >
  259. struct parse_number_helper;
  260. // optimization: must come first
  261. Handler h_;
  262. number num_;
  263. system::error_code ec_;
  264. detail::stack st_;
  265. detail::utf8_sequence seq_;
  266. unsigned u1_;
  267. unsigned u2_;
  268. bool more_; // false for final buffer
  269. bool done_ = false; // true on complete parse
  270. bool clean_ = true; // write_some exited cleanly
  271. const char* end_;
  272. detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
  273. parse_options opt_;
  274. // how many levels deeper the parser can go
  275. std::size_t depth_ = opt_.max_depth;
  276. unsigned char cur_lit_ = 0;
  277. unsigned char lit_offset_ = 0;
  278. inline void reserve();
  279. inline const char* sentinel();
  280. inline bool incomplete(
  281. const detail::const_stream_wrapper& cs);
  282. #ifdef __INTEL_COMPILER
  283. #pragma warning push
  284. #pragma warning disable 2196
  285. #endif
  286. BOOST_NOINLINE
  287. inline
  288. const char*
  289. suspend_or_fail(state st);
  290. BOOST_NOINLINE
  291. inline
  292. const char*
  293. suspend_or_fail(
  294. state st,
  295. std::size_t n);
  296. BOOST_NOINLINE
  297. inline
  298. const char*
  299. fail(const char* p) noexcept;
  300. BOOST_NOINLINE
  301. inline
  302. const char*
  303. fail(
  304. const char* p,
  305. error ev,
  306. source_location const* loc) noexcept;
  307. BOOST_NOINLINE
  308. inline
  309. const char*
  310. maybe_suspend(
  311. const char* p,
  312. state st);
  313. BOOST_NOINLINE
  314. inline
  315. const char*
  316. maybe_suspend(
  317. const char* p,
  318. state st,
  319. std::size_t n);
  320. BOOST_NOINLINE
  321. inline
  322. const char*
  323. maybe_suspend(
  324. const char* p,
  325. state st,
  326. const number& num);
  327. BOOST_NOINLINE
  328. inline
  329. const char*
  330. suspend(
  331. const char* p,
  332. state st);
  333. BOOST_NOINLINE
  334. inline
  335. const char*
  336. suspend(
  337. const char* p,
  338. state st,
  339. const number& num);
  340. #ifdef __INTEL_COMPILER
  341. #pragma warning pop
  342. #endif
  343. template<bool StackEmpty_/*, bool Terminal_*/>
  344. const char* parse_comment(const char* p,
  345. std::integral_constant<bool, StackEmpty_> stack_empty,
  346. /*std::integral_constant<bool, Terminal_>*/ bool terminal);
  347. template<bool StackEmpty_>
  348. const char* parse_document(const char* p,
  349. std::integral_constant<bool, StackEmpty_> stack_empty);
  350. template<bool StackEmpty_, bool AllowComments_/*,
  351. bool AllowTrailing_, bool AllowBadUTF8_*/>
  352. const char* parse_value(const char* p,
  353. std::integral_constant<bool, StackEmpty_> stack_empty,
  354. std::integral_constant<bool, AllowComments_> allow_comments,
  355. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  356. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  357. bool allow_bad_utf16);
  358. template<bool AllowComments_/*,
  359. bool AllowTrailing_, bool AllowBadUTF8_*/>
  360. const char* resume_value(const char* p,
  361. std::integral_constant<bool, AllowComments_> allow_comments,
  362. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  363. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  364. bool allow_bad_utf16);
  365. template<bool StackEmpty_, bool AllowComments_/*,
  366. bool AllowTrailing_, bool AllowBadUTF8_*/>
  367. const char* parse_object(const char* p,
  368. std::integral_constant<bool, StackEmpty_> stack_empty,
  369. std::integral_constant<bool, AllowComments_> allow_comments,
  370. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  371. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  372. bool allow_bad_utf16);
  373. template<bool StackEmpty_, bool AllowComments_/*,
  374. bool AllowTrailing_, bool AllowBadUTF8_*/>
  375. const char* parse_array(const char* p,
  376. std::integral_constant<bool, StackEmpty_> stack_empty,
  377. std::integral_constant<bool, AllowComments_> allow_comments,
  378. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  379. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
  380. bool allow_bad_utf16);
  381. template<int Literal>
  382. const char* parse_literal(const char* p,
  383. std::integral_constant<int, Literal> literal);
  384. template<bool StackEmpty_, bool IsKey_>
  385. const char* parse_string(const char* p,
  386. std::integral_constant<bool, StackEmpty_> stack_empty,
  387. std::integral_constant<bool, IsKey_> is_key,
  388. bool allow_bad_utf8,
  389. bool allow_bad_utf16);
  390. template<bool StackEmpty_>
  391. const char* parse_escaped(
  392. const char* p,
  393. std::size_t& total,
  394. std::integral_constant<bool, StackEmpty_> stack_empty,
  395. bool is_key,
  396. bool allow_bad_utf16);
  397. template<bool StackEmpty_, char First_, number_precision Numbers_>
  398. const char* parse_number(const char* p,
  399. std::integral_constant<bool, StackEmpty_> stack_empty,
  400. std::integral_constant<char, First_> first,
  401. std::integral_constant<number_precision, Numbers_> numbers);
  402. // intentionally private
  403. std::size_t
  404. depth() const noexcept
  405. {
  406. return opt_.max_depth - depth_;
  407. }
  408. public:
  409. /// Copy constructor (deleted)
  410. basic_parser(
  411. basic_parser const&) = delete;
  412. /// Copy assignment (deleted)
  413. basic_parser& operator=(
  414. basic_parser const&) = delete;
  415. /** Destructor.
  416. All dynamically allocated internal memory is freed.
  417. @par Effects
  418. @code
  419. this->handler().~Handler()
  420. @endcode
  421. @par Complexity
  422. Same as `~Handler()`.
  423. @par Exception Safety
  424. Same as `~Handler()`.
  425. */
  426. ~basic_parser() = default;
  427. /** Constructor.
  428. This function constructs the parser with
  429. the specified options, with any additional
  430. arguments forwarded to the handler's constructor.
  431. @par Complexity
  432. Same as `Handler( std::forward< Args >( args )... )`.
  433. @par Exception Safety
  434. Same as `Handler( std::forward< Args >( args )... )`.
  435. @param opt Configuration settings for the parser.
  436. If this structure is default constructed, the
  437. parser will accept only standard JSON.
  438. @param args Optional additional arguments
  439. forwarded to the handler's constructor.
  440. */
  441. template<class... Args>
  442. explicit
  443. basic_parser(
  444. parse_options const& opt,
  445. Args&&... args);
  446. /** Return a reference to the handler.
  447. This function provides access to the constructed
  448. instance of the handler owned by the parser.
  449. @par Complexity
  450. Constant.
  451. @par Exception Safety
  452. No-throw guarantee.
  453. */
  454. Handler&
  455. handler() noexcept
  456. {
  457. return h_;
  458. }
  459. /** Return a reference to the handler.
  460. This function provides access to the constructed
  461. instance of the handler owned by the parser.
  462. @par Complexity
  463. Constant.
  464. @par Exception Safety
  465. No-throw guarantee.
  466. */
  467. Handler const&
  468. handler() const noexcept
  469. {
  470. return h_;
  471. }
  472. /** Return the last error.
  473. This returns the last error code which
  474. was generated in the most recent call
  475. to @ref write_some.
  476. @par Complexity
  477. Constant.
  478. @par Exception Safety
  479. No-throw guarantee.
  480. */
  481. system::error_code
  482. last_error() const noexcept
  483. {
  484. return ec_;
  485. }
  486. /** Return true if a complete JSON has been parsed.
  487. This function returns `true` when all of these
  488. conditions are met:
  489. @li A complete serialized JSON has been
  490. presented to the parser, and
  491. @li No error or exception has occurred since the
  492. parser was constructed, or since the last call
  493. to @ref reset,
  494. @par Complexity
  495. Constant.
  496. @par Exception Safety
  497. No-throw guarantee.
  498. */
  499. bool
  500. done() const noexcept
  501. {
  502. return done_;
  503. }
  504. /** Reset the state, to parse a new document.
  505. This function discards the current parsing
  506. state, to prepare for parsing a new document.
  507. Dynamically allocated temporary memory used
  508. by the implementation is not deallocated.
  509. @par Complexity
  510. Constant.
  511. @par Exception Safety
  512. No-throw guarantee.
  513. */
  514. void
  515. reset() noexcept;
  516. /** Indicate a parsing failure.
  517. This changes the state of the parser to indicate
  518. that the parse has failed. A parser implementation
  519. can use this to fail the parser if needed due to
  520. external inputs.
  521. @note
  522. If `!ec`, the stored error code is unspecified.
  523. @par Complexity
  524. Constant.
  525. @par Exception Safety
  526. No-throw guarantee.
  527. @param ec The error code to set. If the code does
  528. not indicate failure, an implementation-defined
  529. error code that indicates failure will be stored
  530. instead.
  531. */
  532. void
  533. fail(system::error_code ec) noexcept;
  534. /** Parse some of an input string as JSON, incrementally.
  535. This function parses the JSON in the specified
  536. buffer, calling the handler to emit each SAX
  537. parsing event. The parse proceeds from the
  538. current state, which is at the beginning of a
  539. new JSON or in the middle of the current JSON
  540. if any characters were already parsed.
  541. \n
  542. The characters in the buffer are processed
  543. starting from the beginning, until one of the
  544. following conditions is met:
  545. @li All of the characters in the buffer
  546. have been parsed, or
  547. @li Some of the characters in the buffer
  548. have been parsed and the JSON is complete, or
  549. @li A parsing error occurs.
  550. The supplied buffer does not need to contain the
  551. entire JSON. Subsequent calls can provide more
  552. serialized data, allowing JSON to be processed
  553. incrementally. The end of the serialized JSON
  554. can be indicated by passing `more = false`.
  555. @par Complexity
  556. Linear in `size`.
  557. @par Exception Safety
  558. Basic guarantee.
  559. Calls to the handler may throw.
  560. Upon error or exception, subsequent calls will
  561. fail until @ref reset is called to parse a new JSON.
  562. @return The number of characters successfully
  563. parsed, which may be smaller than `size`.
  564. @param more `true` if there are possibly more
  565. buffers in the current JSON, otherwise `false`.
  566. @param data A pointer to a buffer of `size`
  567. characters to parse.
  568. @param size The number of characters pointed to
  569. by `data`.
  570. @param ec Set to the error, if any occurred.
  571. */
  572. /** @{ */
  573. std::size_t
  574. write_some(
  575. bool more,
  576. char const* data,
  577. std::size_t size,
  578. system::error_code& ec);
  579. std::size_t
  580. write_some(
  581. bool more,
  582. char const* data,
  583. std::size_t size,
  584. std::error_code& ec);
  585. /** @} */
  586. };
  587. } // namespace json
  588. } // namespace boost
  589. #endif