/*============================================================================= Copyright (c) 2001-2011 Joel de Guzman Copyright (c) 2023 Nikita Kniazev Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) ==============================================================================*/ #if !defined(BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM) #define BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM #if defined(_MSC_VER) #pragma once #endif #include #include #include #include namespace boost { namespace spirit { typedef ::boost::uint32_t ucs4_char; typedef char utf8_char; typedef std::basic_string ucs4_string; typedef std::basic_string utf8_string; namespace detail { inline void utf8_put_encode(utf8_string& out, ucs4_char x) { // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90 if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul))) x = 0xFFFDul; // Table 3-6. UTF-8 Bit Distribution if (x < 0x80ul) { out.push_back(static_cast(x)); } else if (x < 0x800ul) { out.push_back(static_cast(0xC0ul + (x >> 6))); out.push_back(static_cast(0x80ul + (x & 0x3Ful))); } else if (x < 0x10000ul) { out.push_back(static_cast(0xE0ul + (x >> 12))); out.push_back(static_cast(0x80ul + ((x >> 6) & 0x3Ful))); out.push_back(static_cast(0x80ul + (x & 0x3Ful))); } else { out.push_back(static_cast(0xF0ul + (x >> 18))); out.push_back(static_cast(0x80ul + ((x >> 12) & 0x3Ful))); out.push_back(static_cast(0x80ul + ((x >> 6) & 0x3Ful))); out.push_back(static_cast(0x80ul + (x & 0x3Ful))); } } } template inline utf8_string to_utf8(Char value) { utf8_string result; typedef typename make_unsigned::type UChar; detail::utf8_put_encode(result, static_cast(value)); return result; } template inline utf8_string to_utf8(Char const* str) { utf8_string result; typedef typename make_unsigned::type UChar; while (*str) detail::utf8_put_encode(result, static_cast(*str++)); return result; } template inline utf8_string to_utf8(std::basic_string const& str) { utf8_string result; typedef typename make_unsigned::type UChar; for (Char const* ptr = str.data(), * end = ptr + str.size(); ptr < end; ++ptr) detail::utf8_put_encode(result, static_cast(*ptr)); return result; } // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2 inline utf8_string to_utf8(wchar_t value) { utf8_string result; detail::utf8_put_encode(result, static_cast::type>(value)); return result; } namespace detail { inline ucs4_char decode_utf16(wchar_t const*& s) { typedef make_unsigned::type uwchar_t; uwchar_t x(*s); if (x < 0xD800ul || x > 0xDFFFul) return x; // expected high-surrogate if (BOOST_UNLIKELY((x >> 10) != 0x36ul)) return 0xFFFDul; uwchar_t y(*++s); // expected low-surrogate if (BOOST_UNLIKELY((y >> 10) != 0x37ul)) return 0xFFFDul; return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul; } } inline utf8_string to_utf8(wchar_t const* str) { utf8_string result; for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str) detail::utf8_put_encode(result, c); return result; } template inline utf8_string to_utf8(std::basic_string const& str) { return to_utf8(str.c_str()); } #endif }} #endif