format_sql.ipp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. //
  2. // Copyright (c) 2019-2024 Ruben Perez Hidalgo (rubenperez038 at gmail dot com)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. //
  7. #ifndef BOOST_MYSQL_IMPL_FORMAT_SQL_IPP
  8. #define BOOST_MYSQL_IMPL_FORMAT_SQL_IPP
  9. #include <boost/mysql/blob_view.hpp>
  10. #include <boost/mysql/character_set.hpp>
  11. #include <boost/mysql/client_errc.hpp>
  12. #include <boost/mysql/constant_string_view.hpp>
  13. #include <boost/mysql/diagnostics.hpp>
  14. #include <boost/mysql/error_code.hpp>
  15. #include <boost/mysql/field_kind.hpp>
  16. #include <boost/mysql/field_view.hpp>
  17. #include <boost/mysql/format_sql.hpp>
  18. #include <boost/mysql/string_view.hpp>
  19. #include <boost/mysql/detail/access.hpp>
  20. #include <boost/mysql/detail/escape_string.hpp>
  21. #include <boost/mysql/detail/format_sql.hpp>
  22. #include <boost/mysql/detail/output_string.hpp>
  23. #include <boost/mysql/impl/internal/byte_to_hex.hpp>
  24. #include <boost/mysql/impl/internal/call_next_char.hpp>
  25. #include <boost/mysql/impl/internal/dt_to_string.hpp>
  26. #include <boost/charconv/from_chars.hpp>
  27. #include <boost/charconv/to_chars.hpp>
  28. #include <boost/core/detail/string_view.hpp>
  29. #include <boost/system/result.hpp>
  30. #include <boost/system/system_error.hpp>
  31. #include <boost/throw_exception.hpp>
  32. #include <cmath>
  33. #include <cstddef>
  34. #include <limits>
  35. #include <string>
  36. namespace boost {
  37. namespace mysql {
  38. namespace detail {
  39. // Helpers to format fundamental types
  40. inline void append_quoted_identifier(string_view name, format_context_base& ctx)
  41. {
  42. ctx.append_raw("`");
  43. auto& impl = access::get_impl(ctx);
  44. auto ec = detail::escape_string(name, impl.opts, '`', impl.output);
  45. if (ec)
  46. ctx.add_error(ec);
  47. ctx.append_raw("`");
  48. }
  49. template <class T>
  50. void append_int(T integer, format_context_base& ctx)
  51. {
  52. // Make sure our buffer is big enough. 2: sign + digits10 is only 1 below max
  53. constexpr std::size_t buffsize = 32;
  54. static_assert(2 + std::numeric_limits<double>::digits10 < buffsize, "");
  55. char buff[buffsize];
  56. auto res = charconv::to_chars(buff, buff + buffsize, integer);
  57. // Can only fail becuase of buffer being too small
  58. BOOST_ASSERT(res.ec == std::errc());
  59. // Copy
  60. access::get_impl(ctx).output.append(string_view(buff, res.ptr - buff));
  61. }
  62. inline void append_double(double number, format_context_base& ctx)
  63. {
  64. // Make sure our buffer is big enough. 4: sign, radix point, e+
  65. // 3: max exponent digits
  66. constexpr std::size_t buffsize = 32;
  67. static_assert(4 + std::numeric_limits<double>::max_digits10 + 3 < buffsize, "");
  68. // inf and nan are not supported by MySQL
  69. if (std::isinf(number) || std::isnan(number))
  70. {
  71. ctx.add_error(client_errc::unformattable_value);
  72. return;
  73. }
  74. char buff[buffsize];
  75. // We format as scientific to make MySQL understand the number as a double.
  76. // Otherwise, it takes it as a DECIMAL.
  77. auto res = charconv::to_chars(buff, buff + buffsize, number, charconv::chars_format::scientific);
  78. // Can only fail because of buffer being too small
  79. BOOST_ASSERT(res.ec == std::errc());
  80. // Copy
  81. access::get_impl(ctx).output.append(string_view(buff, res.ptr - buff));
  82. }
  83. inline void append_quoted_string(string_view str, format_context_base& ctx)
  84. {
  85. auto& impl = access::get_impl(ctx);
  86. impl.output.append("'");
  87. auto ec = detail::escape_string(str, impl.opts, '\'', impl.output);
  88. if (ec)
  89. ctx.add_error(ec);
  90. impl.output.append("'");
  91. }
  92. inline void append_string(string_view str, string_view format_spec, format_context_base& ctx)
  93. {
  94. // Parse format spec
  95. if (format_spec.size() > 1u)
  96. {
  97. ctx.add_error(client_errc::format_string_invalid_specifier);
  98. return;
  99. }
  100. // No specifier: quoted string
  101. if (format_spec.empty())
  102. return append_quoted_string(str, ctx);
  103. // We got a specifier
  104. switch (format_spec[0])
  105. {
  106. case 'i':
  107. // format as identifier
  108. return append_quoted_identifier(str, ctx);
  109. case 'r':
  110. // append raw SQL
  111. ctx.append_raw(runtime(str));
  112. break;
  113. default: ctx.add_error(client_errc::format_string_invalid_specifier);
  114. }
  115. }
  116. inline void append_blob(blob_view b, format_context_base& ctx)
  117. {
  118. // Blobs have a binary character set, which may include characters
  119. // that are not valid in the current character set. However, escaping
  120. // is always performed using the character_set_connection.
  121. // mysql_real_escape_string escapes multibyte characters with a backslash,
  122. // but this behavior is not documented, so we don't want to rely on it.
  123. // The most reliable way to encode blobs is using hex strings.
  124. // Output string
  125. auto output = access::get_impl(ctx).output;
  126. // We output characters to a temporary buffer, batching append calls
  127. constexpr std::size_t buffer_size = 64;
  128. char buffer[buffer_size]{};
  129. char* it = buffer;
  130. char* const end = buffer + buffer_size;
  131. // Binary string introducer
  132. output.append("x'");
  133. // Serialize contents
  134. for (unsigned char byte : b)
  135. {
  136. // Serialize the byte
  137. it = byte_to_hex(byte, it);
  138. // If we filled the buffer, dump it
  139. if (it == end)
  140. {
  141. output.append({buffer, buffer_size});
  142. it = buffer;
  143. }
  144. }
  145. // Dump anything that didn't fill the buffer
  146. output.append({buffer, static_cast<std::size_t>(it - buffer)});
  147. // Closing quote
  148. ctx.append_raw("'");
  149. }
  150. inline void append_quoted_date(date d, format_context_base& ctx)
  151. {
  152. char buffer[34];
  153. buffer[0] = '\'';
  154. std::size_t sz = detail::date_to_string(d.year(), d.month(), d.day(), span<char, 32>(buffer + 1, 32));
  155. buffer[sz + 1] = '\'';
  156. access::get_impl(ctx).output.append(string_view(buffer, sz + 2));
  157. }
  158. inline void append_quoted_datetime(datetime d, format_context_base& ctx)
  159. {
  160. char buffer[66];
  161. buffer[0] = '\'';
  162. std::size_t sz = detail::datetime_to_string(
  163. d.year(),
  164. d.month(),
  165. d.day(),
  166. d.hour(),
  167. d.minute(),
  168. d.second(),
  169. d.microsecond(),
  170. span<char, 64>(buffer + 1, 64)
  171. );
  172. buffer[sz + 1] = '\'';
  173. access::get_impl(ctx).output.append(string_view(buffer, sz + 2));
  174. }
  175. inline void append_quoted_time(time t, format_context_base& ctx)
  176. {
  177. char buffer[66];
  178. buffer[0] = '\'';
  179. std::size_t sz = time_to_string(t, span<char, 64>(buffer + 1, 64));
  180. buffer[sz + 1] = '\'';
  181. access::get_impl(ctx).output.append(string_view(buffer, sz + 2));
  182. }
  183. inline void append_field_view(
  184. field_view fv,
  185. string_view format_spec,
  186. bool allow_specs,
  187. format_context_base& ctx
  188. )
  189. {
  190. auto kind = fv.kind();
  191. // String types may allow specs
  192. if (allow_specs && kind == field_kind::string)
  193. {
  194. append_string(fv.get_string(), format_spec, ctx);
  195. return;
  196. }
  197. // Reject specifiers if !allow_specs or for other types
  198. if (!format_spec.empty())
  199. {
  200. ctx.add_error(client_errc::format_string_invalid_specifier);
  201. return;
  202. }
  203. // Perform the formatting operation
  204. switch (fv.kind())
  205. {
  206. case field_kind::null: ctx.append_raw("NULL"); return;
  207. case field_kind::int64: return append_int(fv.get_int64(), ctx);
  208. case field_kind::uint64: return append_int(fv.get_uint64(), ctx);
  209. case field_kind::float_:
  210. // float is formatted as double because it's parsed as such
  211. return append_double(fv.get_float(), ctx);
  212. case field_kind::double_: return append_double(fv.get_double(), ctx);
  213. case field_kind::string: return append_quoted_string(fv.get_string(), ctx);
  214. case field_kind::blob: return append_blob(fv.get_blob(), ctx);
  215. case field_kind::date: return append_quoted_date(fv.get_date(), ctx);
  216. case field_kind::datetime: return append_quoted_datetime(fv.get_datetime(), ctx);
  217. case field_kind::time: return append_quoted_time(fv.get_time(), ctx);
  218. default: BOOST_ASSERT(false); return;
  219. }
  220. }
  221. // Helpers for parsing format strings
  222. inline bool is_number(char c) { return c >= '0' && c <= '9'; }
  223. inline bool is_name_start(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; }
  224. inline bool is_format_spec_char(char c)
  225. {
  226. return c != '{' && c != '}' && static_cast<unsigned char>(c) >= 0x20 &&
  227. static_cast<unsigned char>(c) <= 0x7e;
  228. }
  229. class format_state
  230. {
  231. format_context_base& ctx_;
  232. span<const format_arg> args_;
  233. // Borrowed from fmt
  234. // 0: we haven't used any args yet
  235. // -1: we're doing explicit indexing
  236. // >0: we're doing auto indexing
  237. int next_arg_id_{0};
  238. BOOST_ATTRIBUTE_NODISCARD
  239. bool advance(const char*& it, const char* end)
  240. {
  241. std::size_t size = detail::call_next_char(ctx_.impl_.opts.charset, it, end);
  242. if (size == 0)
  243. {
  244. ctx_.add_error(client_errc::format_string_invalid_encoding);
  245. return false;
  246. }
  247. it += size;
  248. return true;
  249. }
  250. bool uses_auto_ids() const noexcept { return next_arg_id_ > 0; }
  251. bool uses_explicit_ids() const noexcept { return next_arg_id_ == -1; }
  252. void do_field(format_arg arg, string_view format_spec)
  253. {
  254. ctx_.format_arg(access::get_impl(arg).value, format_spec);
  255. }
  256. BOOST_ATTRIBUTE_NODISCARD
  257. bool do_indexed_field(int arg_id, string_view format_spec)
  258. {
  259. BOOST_ASSERT(arg_id >= 0);
  260. if (static_cast<std::size_t>(arg_id) >= args_.size())
  261. {
  262. ctx_.add_error(client_errc::format_arg_not_found);
  263. return false;
  264. }
  265. do_field(args_[arg_id], format_spec);
  266. return true;
  267. }
  268. struct arg_id_t
  269. {
  270. enum class type_t
  271. {
  272. none,
  273. integral,
  274. identifier
  275. };
  276. union data_t
  277. {
  278. unsigned short integral;
  279. string_view identifier;
  280. data_t() noexcept : integral{} {}
  281. };
  282. type_t type;
  283. data_t data;
  284. arg_id_t() noexcept : type(type_t::none), data() {}
  285. arg_id_t(unsigned short v) noexcept : type(type_t::integral) { data.integral = v; }
  286. arg_id_t(string_view v) noexcept : type(type_t::identifier) { data.identifier = v; }
  287. };
  288. BOOST_ATTRIBUTE_NODISCARD
  289. static arg_id_t parse_arg_id(const char*& it, const char* format_end)
  290. {
  291. if (is_number(*it))
  292. {
  293. unsigned short field_index = 0;
  294. auto res = charconv::from_chars(it, format_end, field_index);
  295. if (res.ec != std::errc{})
  296. return arg_id_t();
  297. it = res.ptr;
  298. return field_index;
  299. }
  300. else if (is_name_start(*it))
  301. {
  302. const char* name_begin = it;
  303. while (it != format_end && (is_name_start(*it) || is_number(*it)))
  304. ++it;
  305. string_view field_name(name_begin, it);
  306. return field_name;
  307. }
  308. else
  309. {
  310. return arg_id_t();
  311. }
  312. }
  313. BOOST_ATTRIBUTE_NODISCARD
  314. static string_view parse_format_spec(const char*& it, const char* format_end)
  315. {
  316. if (it != format_end && *it == ':')
  317. {
  318. ++it;
  319. const char* first = it;
  320. while (it != format_end && is_format_spec_char(*it))
  321. ++it;
  322. return {first, it};
  323. }
  324. else
  325. {
  326. return string_view();
  327. }
  328. }
  329. BOOST_ATTRIBUTE_NODISCARD
  330. bool parse_field(const char*& it, const char* format_end)
  331. {
  332. // Taken from fmtlib and adapted to our requirements
  333. // it points to the character next to the opening '{'
  334. // replacement_field ::= "{" [arg_id] [":" (format_spec)] "}"
  335. // arg_id ::= integer | identifier
  336. // integer ::= <decimal, unsigned short, parsed by from_chars>
  337. // identifier ::= id_start id_continue*
  338. // id_start ::= "a"..."z" | "A"..."Z" | "_"
  339. // id_continue ::= id_start | digit
  340. // digit ::= "0"..."9"
  341. // format_spec ::= <any character >= 0x20 && <= 0x7e && != "{", "}">
  342. // Parse the ID and spec components
  343. auto arg_id = parse_arg_id(it, format_end);
  344. auto spec = parse_format_spec(it, format_end);
  345. // If we're not at the end on the string, it's a syntax error
  346. if (it == format_end || *it != '}')
  347. {
  348. ctx_.add_error(client_errc::format_string_invalid_syntax);
  349. return false;
  350. }
  351. ++it;
  352. // Process what was parsed
  353. switch (arg_id.type)
  354. {
  355. case arg_id_t::type_t::none: return append_auto_field(spec);
  356. case arg_id_t::type_t::integral: return append_indexed_field(arg_id.data.integral, spec);
  357. case arg_id_t::type_t::identifier: return append_named_field(arg_id.data.identifier, spec);
  358. default: BOOST_ASSERT(false); return false;
  359. }
  360. }
  361. BOOST_ATTRIBUTE_NODISCARD
  362. bool append_named_field(string_view field_name, string_view format_spec)
  363. {
  364. // Find the argument
  365. for (const auto& arg : args_)
  366. {
  367. if (access::get_impl(arg).name == field_name)
  368. {
  369. do_field(arg, format_spec);
  370. return true;
  371. }
  372. }
  373. // Not found
  374. ctx_.add_error(client_errc::format_arg_not_found);
  375. return false;
  376. }
  377. BOOST_ATTRIBUTE_NODISCARD
  378. bool append_indexed_field(int index, string_view format_spec)
  379. {
  380. if (uses_auto_ids())
  381. {
  382. ctx_.add_error(client_errc::format_string_manual_auto_mix);
  383. return false;
  384. }
  385. next_arg_id_ = -1;
  386. return do_indexed_field(index, format_spec);
  387. }
  388. BOOST_ATTRIBUTE_NODISCARD
  389. bool append_auto_field(string_view format_spec)
  390. {
  391. if (uses_explicit_ids())
  392. {
  393. ctx_.add_error(client_errc::format_string_manual_auto_mix);
  394. return false;
  395. }
  396. return do_indexed_field(next_arg_id_++, format_spec);
  397. }
  398. public:
  399. format_state(format_context_base& ctx, span<const format_arg> args) noexcept : ctx_(ctx), args_(args) {}
  400. void format(string_view format_str)
  401. {
  402. // We can use operator++ when we know a character is ASCII. Some charsets
  403. // allow ASCII continuation bytes, so we need to skip the entire character otherwise
  404. auto cur_begin = format_str.data();
  405. auto it = format_str.data();
  406. auto end = format_str.data() + format_str.size();
  407. while (it != end)
  408. {
  409. if (*it == '{')
  410. {
  411. // May be a replacement field or a literal brace. In any case, dump accumulated output
  412. ctx_.impl_.output.append({cur_begin, it});
  413. ++it;
  414. if (it == end)
  415. {
  416. // If the string ends here, it's en error
  417. ctx_.add_error(client_errc::format_string_invalid_syntax);
  418. return;
  419. }
  420. else if (*it == '{')
  421. {
  422. // A double brace is the escaped form of '{'
  423. ctx_.append_raw("{");
  424. ++it;
  425. }
  426. else
  427. {
  428. // It's a replacement field. Process it
  429. if (!parse_field(it, end))
  430. return;
  431. }
  432. cur_begin = it;
  433. }
  434. else if (*it == '}')
  435. {
  436. // A lonely } is only legal as a escape curly brace (i.e. }})
  437. ctx_.impl_.output.append({cur_begin, it});
  438. ++it;
  439. if (it == end || *it != '}')
  440. {
  441. ctx_.add_error(client_errc::format_string_invalid_syntax);
  442. return;
  443. }
  444. ctx_.impl_.output.append("}");
  445. ++it;
  446. cur_begin = it;
  447. }
  448. else
  449. {
  450. if (!advance(it, end))
  451. return;
  452. }
  453. }
  454. // Dump any remaining SQL
  455. ctx_.impl_.output.append({cur_begin, end});
  456. }
  457. };
  458. } // namespace detail
  459. } // namespace mysql
  460. } // namespace boost
  461. void boost::mysql::format_context_base::format_arg(detail::formattable_ref_impl arg, string_view format_spec)
  462. {
  463. switch (arg.type)
  464. {
  465. case detail::formattable_ref_impl::type_t::field:
  466. detail::append_field_view(arg.data.fv, format_spec, false, *this);
  467. break;
  468. case detail::formattable_ref_impl::type_t::field_with_specs:
  469. detail::append_field_view(arg.data.fv, format_spec, true, *this);
  470. break;
  471. case detail::formattable_ref_impl::type_t::fn_and_ptr:
  472. if (!arg.data.custom.format_fn(arg.data.custom.obj, format_spec.begin(), format_spec.end(), *this))
  473. {
  474. add_error(client_errc::format_string_invalid_specifier);
  475. }
  476. break;
  477. default: BOOST_ASSERT(false);
  478. }
  479. }
  480. void boost::mysql::format_sql_to(
  481. format_context_base& ctx,
  482. constant_string_view format_str,
  483. std::initializer_list<format_arg> args
  484. )
  485. {
  486. detail::format_state(ctx, {args.begin(), args.end()}).format(format_str.get());
  487. }
  488. std::string boost::mysql::format_sql(
  489. format_options opts,
  490. constant_string_view format_str,
  491. std::initializer_list<format_arg> args
  492. )
  493. {
  494. format_context ctx(opts);
  495. format_sql_to(ctx, format_str, args);
  496. return std::move(ctx).get().value();
  497. }
  498. std::pair<bool, boost::mysql::string_view> boost::mysql::detail::parse_range_specifiers(
  499. const char* spec_begin,
  500. const char* spec_end
  501. )
  502. {
  503. // range_format_spec ::= [":" [underlying_spec]]
  504. // Example: {::i} => format an array of strings as identifiers
  505. // Empty: no specifiers
  506. if (spec_begin == spec_end)
  507. return {true, {}};
  508. // If the first character is not a ':', the spec is invalid.
  509. if (*spec_begin != ':')
  510. return {false, {}};
  511. ++spec_begin;
  512. // Return the rest of the range
  513. return {
  514. true,
  515. {spec_begin, spec_end}
  516. };
  517. }
  518. #endif