123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- //
- // Copyright (c) 2019-2024 Ruben Perez Hidalgo (rubenperez038 at gmail dot com)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- //
- #ifndef BOOST_MYSQL_IMPL_CHARACTER_SET_IPP
- #define BOOST_MYSQL_IMPL_CHARACTER_SET_IPP
- #pragma once
- #include <boost/mysql/character_set.hpp>
- #include <boost/assert.hpp>
- namespace boost {
- namespace mysql {
- namespace detail {
- inline bool in_range(unsigned char byte, unsigned char lower, unsigned char upper)
- {
- return byte >= lower && byte <= upper;
- }
- } // namespace detail
- } // namespace mysql
- } // namespace boost
- std::size_t boost::mysql::detail::next_char_utf8mb4(span<const unsigned char> input)
- {
- // s[0] s[1] s[2] s[3] comment
- // 00-7F ascii
- // 80-c1 invalid
- // c2-df 80-bf 2byte
- // e0 a0-bf 80-bf 3byte, case 1
- // e1-ec 80-bf 80-bf 3byte, case 2
- // ed 80-9f 80-bf 3byte, case 3 (surrogates)
- // ee-ef 80-bf 80-bf 3byte, case 2
- // f0 90-bf 80-bf 80-bf 4byte, case 1
- // f1-f3 80-bf 80-bf 80-bf 4byte, case 2
- // f4 80-8f 80-bf 80-bf 4byte, case 3
- BOOST_ASSERT(!input.empty());
- auto first_char = input.front();
- BOOST_ASSERT(first_char >= 0x80); // ascii range covered by call_next_char
- if (first_char < 0xc2)
- {
- return 0;
- }
- else if (first_char < 0xe0)
- {
- return (input.size() < 2u || !in_range(input[1], 0x80, 0xbf)) ? 0 : 2;
- }
- else if (first_char == 0xe0)
- {
- return (input.size() < 3u || !in_range(input[1], 0xa0, 0xbf) || !in_range(input[2], 0x80, 0xbf)) ? 0
- : 3;
- }
- else if (first_char == 0xed)
- {
- return (input.size() < 3u || !in_range(input[1], 0x80, 0x9f) || !in_range(input[2], 0x80, 0xbf)) ? 0
- : 3;
- }
- else if (first_char <= 0xef)
- {
- // Includes e1-ec and ee-ef
- return (input.size() < 3u || !in_range(input[1], 0x80, 0xbf) || !in_range(input[2], 0x80, 0xbf)) ? 0
- : 3;
- }
- else if (first_char == 0xf0)
- {
- return (input.size() < 4u || !in_range(input[1], 0x90, 0xbf) || !in_range(input[2], 0x80, 0xbf) ||
- !in_range(input[3], 0x80, 0xbf))
- ? 0
- : 4;
- }
- else if (first_char <= 0xf3)
- {
- return (input.size() < 4u || !in_range(input[1], 0x80, 0xbf) || !in_range(input[2], 0x80, 0xbf) ||
- !in_range(input[3], 0x80, 0xbf))
- ? 0
- : 4;
- }
- else if (first_char == 0xf4)
- {
- return (input.size() < 4u || !in_range(input[1], 0x80, 0x8f) || !in_range(input[2], 0x80, 0xbf) ||
- !in_range(input[3], 0x80, 0xbf))
- ? 0
- : 4;
- }
- else
- {
- return 0;
- }
- }
- #endif
|