encoding_utf.hpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. // Copyright (c) 2022-2024 Alexander Grund
  4. //
  5. // Distributed under the Boost Software License, Version 1.0.
  6. // https://www.boost.org/LICENSE_1_0.txt
  7. #ifndef BOOST_LOCALE_ENCODING_UTF_HPP_INCLUDED
  8. #define BOOST_LOCALE_ENCODING_UTF_HPP_INCLUDED
  9. #include <boost/locale/detail/allocator_traits.hpp>
  10. #include <boost/locale/encoding_errors.hpp>
  11. #include <boost/locale/utf.hpp>
  12. #include <boost/locale/util/string.hpp>
  13. #include <iterator>
  14. #include <memory>
  15. #include <type_traits>
  16. #ifdef BOOST_MSVC
  17. # pragma warning(push)
  18. # pragma warning(disable : 4275 4251 4231 4660)
  19. #endif
  20. namespace boost { namespace locale { namespace conv {
  21. /// \addtogroup codepage
  22. ///
  23. /// @{
  24. /// Convert a Unicode text in range [begin,end) to other Unicode encoding
  25. ///
  26. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
  27. template<typename CharOut, typename CharIn, class Alloc = std::allocator<CharOut>>
  28. std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
  29. utf_to_utf(const CharIn* begin, const CharIn* end, method_type how = default_method, const Alloc& alloc = Alloc())
  30. {
  31. std::basic_string<CharOut, std::char_traits<CharOut>, Alloc> result(alloc);
  32. result.reserve(end - begin);
  33. auto inserter = std::back_inserter(result);
  34. while(begin != end) {
  35. const utf::code_point c = utf::utf_traits<CharIn>::decode(begin, end);
  36. if(c == utf::illegal || c == utf::incomplete) {
  37. if(how == stop)
  38. throw conversion_error();
  39. } else
  40. utf::utf_traits<CharOut>::encode(c, inserter);
  41. }
  42. return result;
  43. }
  44. /// Convert a Unicode string \a str to other Unicode encoding.
  45. /// Invalid characters are skipped.
  46. template<typename CharOut, typename CharIn, class Alloc>
  47. std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
  48. utf_to_utf(const CharIn* begin, const CharIn* end, const Alloc& alloc)
  49. {
  50. return utf_to_utf<CharOut>(begin, end, skip, alloc);
  51. }
  52. /// Convert a Unicode NULL terminated string \a str to other Unicode encoding
  53. ///
  54. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
  55. template<typename CharOut, typename CharIn, class Alloc = std::allocator<CharOut>>
  56. std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
  57. utf_to_utf(const CharIn* str, method_type how = default_method, const Alloc& alloc = Alloc())
  58. {
  59. return utf_to_utf<CharOut>(str, util::str_end(str), how, alloc);
  60. }
  61. /// Convert a Unicode string \a str to other Unicode encoding.
  62. /// Invalid characters are skipped.
  63. template<typename CharOut, typename CharIn, class Alloc>
  64. #ifndef BOOST_LOCALE_DOXYGEN
  65. detail::enable_if_allocator_for<Alloc,
  66. CharOut,
  67. #endif
  68. std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
  69. #ifndef BOOST_LOCALE_DOXYGEN
  70. >
  71. #endif
  72. utf_to_utf(const CharIn* str, const Alloc& alloc)
  73. {
  74. return utf_to_utf<CharOut>(str, skip, alloc);
  75. }
  76. /// Convert a Unicode string \a str to other Unicode encoding
  77. ///
  78. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
  79. template<typename CharOut, typename CharIn, class Alloc>
  80. #ifndef BOOST_LOCALE_DOXYGEN
  81. detail::enable_if_allocator_for<
  82. Alloc,
  83. CharIn,
  84. #endif
  85. std::basic_string<CharOut, std::char_traits<CharOut>, detail::rebind_alloc<Alloc, CharOut>>
  86. #ifndef BOOST_LOCALE_DOXYGEN
  87. >
  88. #endif
  89. utf_to_utf(const std::basic_string<CharIn, std::char_traits<CharIn>, Alloc>& str, method_type how = default_method)
  90. {
  91. return utf_to_utf<CharOut>(str.c_str(),
  92. str.c_str() + str.size(),
  93. how,
  94. detail::rebind_alloc<Alloc, CharOut>(str.get_allocator()));
  95. }
  96. /// Convert a Unicode string \a str to other Unicode encoding
  97. ///
  98. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
  99. template<typename CharOut, typename CharIn, class AllocOut, class AllocIn>
  100. #ifndef BOOST_LOCALE_DOXYGEN
  101. detail::enable_if_allocator_for<AllocIn,
  102. CharIn,
  103. #endif
  104. std::basic_string<CharOut, std::char_traits<CharOut>, AllocOut>
  105. #ifndef BOOST_LOCALE_DOXYGEN
  106. >
  107. #endif
  108. utf_to_utf(const std::basic_string<CharIn, std::char_traits<CharIn>, AllocIn>& str,
  109. method_type how = default_method,
  110. const AllocOut& alloc = AllocOut())
  111. {
  112. return utf_to_utf<CharOut>(str.c_str(), str.c_str() + str.size(), how, alloc);
  113. }
  114. /// Convert a Unicode string \a str to other Unicode encoding.
  115. /// Invalid characters are skipped.
  116. template<typename CharOut, typename CharIn, class AllocOut, class AllocIn>
  117. #ifndef BOOST_LOCALE_DOXYGEN
  118. detail::enable_if_allocator_for2<AllocIn,
  119. CharIn,
  120. AllocOut,
  121. CharOut,
  122. #endif
  123. std::basic_string<CharOut, std::char_traits<CharOut>, AllocOut>
  124. #ifndef BOOST_LOCALE_DOXYGEN
  125. >
  126. #endif
  127. utf_to_utf(const std::basic_string<CharIn, std::char_traits<CharIn>, AllocIn>& str, const AllocOut& alloc)
  128. {
  129. return utf_to_utf<CharOut>(str, skip, alloc);
  130. }
  131. /// @}
  132. }}} // namespace boost::locale::conv
  133. #ifdef BOOST_MSVC
  134. # pragma warning(pop)
  135. #endif
  136. #endif