parser.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. // Copyright 2023 Matt Borland
  2. // Distributed under the Boost Software License, Version 1.0.
  3. // https://www.boost.org/LICENSE_1_0.txt
  4. #ifndef BOOST_CHARCONV_DETAIL_PARSER_HPP
  5. #define BOOST_CHARCONV_DETAIL_PARSER_HPP
  6. #include <boost/charconv/detail/config.hpp>
  7. #include <boost/charconv/detail/from_chars_result.hpp>
  8. #include <boost/charconv/detail/from_chars_integer_impl.hpp>
  9. #include <boost/charconv/detail/integer_search_trees.hpp>
  10. #include <boost/charconv/limits.hpp>
  11. #include <boost/charconv/chars_format.hpp>
  12. #include <system_error>
  13. #include <type_traits>
  14. #include <limits>
  15. #include <cerrno>
  16. #include <cstdint>
  17. #include <cstring>
  18. #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
  19. # pragma GCC diagnostic push
  20. # pragma GCC diagnostic ignored "-Wmissing-field-initializers"
  21. #endif
  22. namespace boost { namespace charconv { namespace detail {
  23. inline bool is_integer_char(char c) noexcept
  24. {
  25. return (c >= '0') && (c <= '9');
  26. }
  27. inline bool is_hex_char(char c) noexcept
  28. {
  29. return is_integer_char(c) || (((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')));
  30. }
  31. inline bool is_delimiter(char c, chars_format fmt) noexcept
  32. {
  33. if (fmt != chars_format::hex)
  34. {
  35. return !is_integer_char(c) && c != 'e' && c != 'E';
  36. }
  37. return !is_hex_char(c) && c != 'p' && c != 'P';
  38. }
  39. inline from_chars_result from_chars_dispatch(const char* first, const char* last, std::uint64_t& value, int base) noexcept
  40. {
  41. return boost::charconv::detail::from_chars(first, last, value, base);
  42. }
  43. inline from_chars_result from_chars_dispatch(const char* first, const char* last, uint128& value, int base) noexcept
  44. {
  45. return boost::charconv::detail::from_chars128(first, last, value, base);
  46. }
  47. #ifdef BOOST_CHARCONV_HAS_INT128
  48. inline from_chars_result from_chars_dispatch(const char* first, const char* last, boost::uint128_type& value, int base) noexcept
  49. {
  50. return boost::charconv::detail::from_chars128(first, last, value, base);
  51. }
  52. #endif
  53. template <typename Unsigned_Integer, typename Integer>
  54. inline from_chars_result parser(const char* first, const char* last, bool& sign, Unsigned_Integer& significand, Integer& exponent, chars_format fmt = chars_format::general) noexcept
  55. {
  56. if (first > last)
  57. {
  58. return {first, std::errc::invalid_argument};
  59. }
  60. auto next = first;
  61. bool all_zeros = true;
  62. // First extract the sign
  63. if (*next == '-')
  64. {
  65. sign = true;
  66. ++next;
  67. }
  68. else if (*next == '+')
  69. {
  70. return {next, std::errc::invalid_argument};
  71. }
  72. else
  73. {
  74. sign = false;
  75. }
  76. // Handle non-finite values
  77. // Stl allows for string like "iNf" to return inf
  78. //
  79. // This is nested ifs rather than a big one-liner to ensure that once we hit an invalid character
  80. // or an end of buffer we return the correct value of next
  81. if (next != last && (*next == 'i' || *next == 'I'))
  82. {
  83. ++next;
  84. if (next != last && (*next == 'n' || *next == 'N'))
  85. {
  86. ++next;
  87. if (next != last && (*next == 'f' || *next == 'F'))
  88. {
  89. significand = 0;
  90. return {next, std::errc::value_too_large};
  91. }
  92. }
  93. return {next, std::errc::invalid_argument};
  94. }
  95. else if (next != last && (*next == 'n' || *next == 'N'))
  96. {
  97. ++next;
  98. if (next != last && (*next == 'a' || *next == 'A'))
  99. {
  100. ++next;
  101. if (next != last && (*next == 'n' || *next == 'N'))
  102. {
  103. ++next;
  104. if (next != last && (*next == '('))
  105. {
  106. ++next;
  107. if (next != last && (*next == 's' || *next == 'S'))
  108. {
  109. significand = 1;
  110. return {next, std::errc::not_supported};
  111. }
  112. else if (next != last && (*next == 'i' || *next == 'I'))
  113. {
  114. significand = 0;
  115. return {next, std::errc::not_supported};
  116. }
  117. }
  118. else
  119. {
  120. significand = 0;
  121. return {next, std::errc::not_supported};
  122. }
  123. }
  124. }
  125. return {next, std::errc::invalid_argument};
  126. }
  127. // Ignore leading zeros (e.g. 00005 or -002.3e+5)
  128. while (next != last && *next == '0')
  129. {
  130. ++next;
  131. }
  132. // If the number is 0 we can abort now
  133. char exp_char;
  134. char capital_exp_char;
  135. if (fmt != chars_format::hex)
  136. {
  137. exp_char = 'e';
  138. capital_exp_char = 'E';
  139. }
  140. else
  141. {
  142. exp_char = 'p';
  143. capital_exp_char = 'P';
  144. }
  145. if (next == last || *next == exp_char || *next == -capital_exp_char)
  146. {
  147. significand = 0;
  148. exponent = 0;
  149. return {next, std::errc()};
  150. }
  151. // Next we get the significand
  152. constexpr std::size_t significand_buffer_size = limits<Unsigned_Integer>::max_chars10 - 1; // Base 10 or 16
  153. char significand_buffer[significand_buffer_size] {};
  154. std::size_t i = 0;
  155. std::size_t dot_position = 0;
  156. Integer extra_zeros = 0;
  157. Integer leading_zero_powers = 0;
  158. const auto char_validation_func = (fmt != boost::charconv::chars_format::hex) ? is_integer_char : is_hex_char;
  159. const int base = (fmt != boost::charconv::chars_format::hex) ? 10 : 16;
  160. while (next != last && char_validation_func(*next) && i < significand_buffer_size)
  161. {
  162. all_zeros = false;
  163. significand_buffer[i] = *next;
  164. ++next;
  165. ++i;
  166. }
  167. bool fractional = false;
  168. if (next == last)
  169. {
  170. // if fmt is chars_format::scientific the e is required
  171. if (fmt == chars_format::scientific)
  172. {
  173. return {first, std::errc::invalid_argument};
  174. }
  175. exponent = 0;
  176. std::size_t offset = i;
  177. from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
  178. switch (r.ec)
  179. {
  180. case std::errc::invalid_argument:
  181. return {first, std::errc::invalid_argument};
  182. case std::errc::result_out_of_range:
  183. return {next, std::errc::result_out_of_range};
  184. default:
  185. return {next, std::errc()};
  186. }
  187. }
  188. else if (*next == '.')
  189. {
  190. ++next;
  191. fractional = true;
  192. dot_position = i;
  193. // Process the fractional part if we have it
  194. //
  195. // if fmt is chars_format::scientific the e is required
  196. // if fmt is chars_format::fixed and not scientific the e is disallowed
  197. // if fmt is chars_format::general (which is scientific and fixed) the e is optional
  198. // If we have the value 0.00001 we can continue to chop zeros and adjust the exponent
  199. // so that we get the useful parts of the fraction
  200. if (all_zeros)
  201. {
  202. while (next != last && *next == '0')
  203. {
  204. ++next;
  205. --leading_zero_powers;
  206. }
  207. if (next == last)
  208. {
  209. return {last, std::errc()};
  210. }
  211. }
  212. while (next != last && char_validation_func(*next) && i < significand_buffer_size)
  213. {
  214. significand_buffer[i] = *next;
  215. ++next;
  216. ++i;
  217. }
  218. }
  219. if (i == significand_buffer_size)
  220. {
  221. // We can not process any more significant figures into the significand so skip to the end
  222. // or the exponent part and capture the additional orders of magnitude for the exponent
  223. bool found_dot = false;
  224. while (next != last && (char_validation_func(*next) || *next == '.'))
  225. {
  226. ++next;
  227. if (!fractional && !found_dot)
  228. {
  229. ++extra_zeros;
  230. }
  231. if (next != last && *next == '.')
  232. {
  233. found_dot = true;
  234. }
  235. }
  236. }
  237. if (next == last || is_delimiter(*next, fmt))
  238. {
  239. if (fmt == chars_format::scientific)
  240. {
  241. return {first, std::errc::invalid_argument};
  242. }
  243. if (dot_position != 0 || fractional)
  244. {
  245. exponent = static_cast<Integer>(dot_position) - static_cast<Integer>(i) + extra_zeros + leading_zero_powers;
  246. }
  247. else
  248. {
  249. exponent = extra_zeros + leading_zero_powers;
  250. }
  251. std::size_t offset = i;
  252. from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
  253. switch (r.ec)
  254. {
  255. case std::errc::invalid_argument:
  256. return {first, std::errc::invalid_argument};
  257. case std::errc::result_out_of_range:
  258. return {next, std::errc::result_out_of_range};
  259. default:
  260. return {next, std::errc()};
  261. }
  262. }
  263. else if (*next == exp_char || *next == capital_exp_char)
  264. {
  265. // Would be a number without a significand e.g. e+03
  266. if (next == first)
  267. {
  268. return {next, std::errc::invalid_argument};
  269. }
  270. ++next;
  271. if (fmt == chars_format::fixed)
  272. {
  273. return {first, std::errc::invalid_argument};
  274. }
  275. exponent = static_cast<Integer>(i - 1);
  276. std::size_t offset = i;
  277. bool round = false;
  278. // If more digits are present than representable in the significand of the target type
  279. // we set the maximum
  280. if (offset > significand_buffer_size)
  281. {
  282. offset = significand_buffer_size - 1;
  283. i = significand_buffer_size;
  284. if (significand_buffer[offset] == '5' ||
  285. significand_buffer[offset] == '6' ||
  286. significand_buffer[offset] == '7' ||
  287. significand_buffer[offset] == '8' ||
  288. significand_buffer[offset] == '9')
  289. {
  290. round = true;
  291. }
  292. }
  293. // If the significand is 0 from chars will return std::errc::invalid_argument because there is nothing in the buffer,
  294. // but it is a valid value. We need to continue parsing to get the correct value of ptr even
  295. // though we know we could bail now.
  296. //
  297. // See GitHub issue #29: https://github.com/cppalliance/charconv/issues/29
  298. if (offset != 0)
  299. {
  300. from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
  301. switch (r.ec)
  302. {
  303. case std::errc::invalid_argument:
  304. return {first, std::errc::invalid_argument};
  305. case std::errc::result_out_of_range:
  306. return {next, std::errc::result_out_of_range};
  307. default:
  308. break;
  309. }
  310. if (round)
  311. {
  312. significand += 1;
  313. }
  314. }
  315. }
  316. else
  317. {
  318. return {first, std::errc::invalid_argument};
  319. }
  320. // Finally we get the exponent
  321. constexpr std::size_t exponent_buffer_size = 6; // Float128 min exp is −16382
  322. char exponent_buffer[exponent_buffer_size] {};
  323. const auto significand_digits = i;
  324. i = 0;
  325. // Get the sign first
  326. if (next != last && *next == '-')
  327. {
  328. exponent_buffer[i] = *next;
  329. ++next;
  330. ++i;
  331. }
  332. else if (next != last && *next == '+')
  333. {
  334. ++next;
  335. }
  336. // Next strip any leading zeros
  337. while (next != last && *next == '0')
  338. {
  339. ++next;
  340. }
  341. // Process the significant values
  342. while (next != last && is_integer_char(*next) && i < exponent_buffer_size)
  343. {
  344. exponent_buffer[i] = *next;
  345. ++next;
  346. ++i;
  347. }
  348. // If the exponent can't fit in the buffer the number is not representable
  349. if (next != last && i == exponent_buffer_size)
  350. {
  351. return {next, std::errc::result_out_of_range};
  352. }
  353. // If the exponent was e+00 or e-00
  354. if (i == 0 || (i == 1 && exponent_buffer[0] == '-'))
  355. {
  356. if (fractional)
  357. {
  358. exponent = static_cast<Integer>(dot_position - significand_digits);
  359. }
  360. else
  361. {
  362. exponent = extra_zeros;
  363. }
  364. return {next, std::errc()};
  365. }
  366. const auto r = from_chars(exponent_buffer, exponent_buffer + i, exponent);
  367. exponent += leading_zero_powers;
  368. switch (r.ec)
  369. {
  370. case std::errc::invalid_argument:
  371. return {first, std::errc::invalid_argument};
  372. case std::errc::result_out_of_range:
  373. return {next, std::errc::result_out_of_range};
  374. default:
  375. if (fractional)
  376. {
  377. // Need to take the offset from 1.xxx because compute_floatXXX assumes the significand is an integer
  378. // so the exponent is off by the number of digits in the significand - 1
  379. if (fmt == chars_format::hex)
  380. {
  381. // In hex the number of digits parsed is possibly less than the number of digits in base10
  382. exponent -= num_digits(significand) - static_cast<Integer>(dot_position);
  383. }
  384. else
  385. {
  386. exponent -= static_cast<Integer>(significand_digits - dot_position);
  387. }
  388. }
  389. else
  390. {
  391. exponent += extra_zeros;
  392. }
  393. return {next, std::errc()};
  394. }
  395. }
  396. }}} // Namespaces
  397. #if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
  398. # pragma GCC diagnostic pop
  399. #endif
  400. #endif // BOOST_CHARCONV_DETAIL_PARSER_HPP