codecvt.hpp 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700
  1. /*
  2. * Copyright (c) 2017-2023 zhllxt
  3. *
  4. * author : zhllxt
  5. * email : 37792738@qq.com
  6. *
  7. * Distributed under the Boost Software License, Version 1.0. (See accompanying
  8. * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. * /Microsoft Visual Studio/2022/Enterprise/VC/Tools/MSVC/14.33.31629/include/codecvt
  11. *
  12. */
  13. #ifndef __ASIO2_CODECVT_HPP__
  14. #define __ASIO2_CODECVT_HPP__
  15. #if defined(_MSC_VER) && (_MSC_VER >= 1200)
  16. #pragma once
  17. #endif // defined(_MSC_VER) && (_MSC_VER >= 1200)
  18. #include <asio2/base/detail/push_options.hpp>
  19. #include <cstdlib>
  20. #include <locale>
  21. #include <codecvt>
  22. #include <asio2/external/predef.h>
  23. #include <asio2/external/asio.hpp>
  24. #include <asio2/util/string.hpp>
  25. #if defined(ASIO2_OS_WINDOWS) || defined(__CYGWIN__)
  26. # if __has_include(<Windows.h>)
  27. # include <Windows.h>
  28. # ifndef ASIO2_LOCALE_USE_WIN32_API
  29. # define ASIO2_LOCALE_USE_WIN32_API
  30. # endif
  31. # endif
  32. #endif
  33. namespace asio2
  34. {
  35. using codecvt_base = std::codecvt_base;
  36. template <class _Elem, class _Byte, class _Statype>
  37. using codecvt = std::codecvt<_Elem, _Byte, _Statype>;
  38. template <class _Elem, class _Byte, class _Statype>
  39. using codecvt_byname = std::codecvt_byname<_Elem, _Byte, _Statype>;
  40. namespace detail
  41. {
  42. inline constexpr int _Codecvt_Little_first = 1;
  43. inline constexpr int _Codecvt_Big_first = 2;
  44. template <class _CvtTy, class _Byte, class _Statype>
  45. [[nodiscard]] int _Codecvt_do_length(
  46. const _CvtTy& _Cvt, _Statype& _State, const _Byte* _First1, const _Byte* _Last1, std::size_t _Count) {
  47. // return p - _First1, for the largest value p in [_First1, _Last1] such that _Cvt will successfully convert
  48. // [_First1, p) to at most _Count wide characters
  49. using _Elem = typename _CvtTy::intern_type;
  50. const auto _Old_first1 = _First1;
  51. while (_Count > 0 && _First1 != _Last1) { // convert another wide character
  52. const _Byte* _Mid1;
  53. _Elem* _Mid2;
  54. _Elem _Ch;
  55. // test result of single widechar conversion
  56. const auto _Result = _Cvt._CvtTy::do_in(_State, _First1, _Last1, _Mid1, &_Ch, &_Ch + 1, _Mid2);
  57. if (_Result != std::codecvt_base::ok) {
  58. if (_Result == std::codecvt_base::noconv) {
  59. _First1 += (std::min)(static_cast<std::size_t>(_Last1 - _First1), _Count);
  60. }
  61. break; // error, noconv, or partial
  62. }
  63. if (_Mid2 == &_Ch + 1) {
  64. --_Count; // do_in converted an output character
  65. }
  66. _First1 = _Mid1;
  67. }
  68. return static_cast<int>((std::min)(_First1 - _Old_first1, std::ptrdiff_t{(std::numeric_limits<int>::max)()}));
  69. }
  70. }
  71. enum codecvt_mode { consume_header = 4, generate_header = 2, little_endian = 1 };
  72. template <
  73. class _Elem,
  74. class CharT = char,
  75. unsigned long _Mymax = 0x10ffff,
  76. asio2::codecvt_mode _Mymode = asio2::codecvt_mode{}>
  77. class codecvt_utf8 : public std::codecvt<_Elem, CharT, std::mbstate_t> {
  78. // facet for converting between _Elem and UTF-8 byte sequences
  79. public:
  80. using _Mybase = std::codecvt<_Elem, CharT, std::mbstate_t>;
  81. using result = typename _Mybase::result;
  82. using _Byte = CharT;
  83. using intern_type = _Elem;
  84. using extern_type = _Byte;
  85. using state_type = std::mbstate_t;
  86. explicit codecvt_utf8(std::size_t _Refs = 0) : _Mybase(_Refs) {}
  87. ~codecvt_utf8() noexcept override {}
  88. protected:
  89. result do_in(std::mbstate_t& _State, const _Byte* _First1, const _Byte* _Last1, const _Byte*& _Mid1,
  90. _Elem* _First2, _Elem* _Last2, _Elem*& _Mid2) const override {
  91. // convert bytes [_First1, _Last1) to [_First2, _Last2)
  92. std::int8_t* _Pstate = reinterpret_cast<std::int8_t*>(&_State);
  93. _Mid1 = _First1;
  94. _Mid2 = _First2;
  95. while (_Mid1 != _Last1 && _Mid2 != _Last2) { // convert a multibyte sequence
  96. unsigned long _By = static_cast<std::make_unsigned_t<CharT>>(*_Mid1);
  97. unsigned long _Ch;
  98. int _Nextra;
  99. if (_By < 0x80u) {
  100. _Ch = _By;
  101. _Nextra = 0;
  102. } else if (_By < 0xc0u) { // 0x80-0xbf not first byte
  103. ++_Mid1;
  104. return _Mybase::error;
  105. } else if (_By < 0xe0u) {
  106. _Ch = _By & 0x1f;
  107. _Nextra = 1;
  108. } else if (_By < 0xf0u) {
  109. _Ch = _By & 0x0f;
  110. _Nextra = 2;
  111. } else if (_By < 0xf8u) {
  112. _Ch = _By & 0x07;
  113. _Nextra = 3;
  114. } else {
  115. _Ch = _By & 0x03;
  116. _Nextra = _By < 0xfc ? 4 : 5;
  117. }
  118. if (_Nextra == 0) {
  119. ++_Mid1;
  120. } else if (_Last1 - _Mid1 < _Nextra + 1) {
  121. break; // not enough input
  122. } else {
  123. for (++_Mid1; 0 < _Nextra; --_Nextra, ++_Mid1) {
  124. if ((_By = static_cast<std::make_unsigned_t<CharT>>(*_Mid1)) < 0x80u || 0xc0u <= _By) {
  125. return _Mybase::error; // not continuation byte
  126. } else {
  127. _Ch = _Ch << 6 | (_By & 0x3f);
  128. }
  129. }
  130. }
  131. if (*_Pstate == 0) { // first time, maybe look for and consume header
  132. *_Pstate = 1;
  133. constexpr bool _Consuming = (_Mymode & consume_header) != 0;
  134. if constexpr (_Consuming) {
  135. if (_Ch == 0xfeff) { // drop header and retry
  136. const result _Ans = do_in(_State, _Mid1, _Last1, _Mid1, _First2, _Last2, _Mid2);
  137. if (_Ans == _Mybase::partial) { // roll back header determination
  138. *_Pstate = 0;
  139. _Mid1 = _First1;
  140. }
  141. return _Ans;
  142. }
  143. }
  144. }
  145. if (_Mymax < _Ch) {
  146. return _Mybase::error; // code too large
  147. }
  148. *_Mid2++ = static_cast<_Elem>(_Ch);
  149. }
  150. return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
  151. }
  152. result do_out(std::mbstate_t& _State, const _Elem* _First1, const _Elem* _Last1, const _Elem*& _Mid1,
  153. _Byte* _First2, _Byte* _Last2, _Byte*& _Mid2) const override {
  154. // convert [_First1, _Last1) to bytes [_First2, _Last2)
  155. std::int8_t* _Pstate = reinterpret_cast<std::int8_t*>(&_State);
  156. _Mid1 = _First1;
  157. _Mid2 = _First2;
  158. while (_Mid1 != _Last1 && _Mid2 != _Last2) { // convert and put a widechar
  159. _Byte _By;
  160. int _Nextra;
  161. unsigned long _Ch = static_cast<unsigned long>(*_Mid1);
  162. if (_Mymax < _Ch) {
  163. return _Mybase::error;
  164. }
  165. if (_Ch < 0x0080u) {
  166. _By = static_cast<_Byte>(_Ch);
  167. _Nextra = 0;
  168. } else if (_Ch < 0x0800u) {
  169. _By = static_cast<_Byte>(0xc0 | _Ch >> 6);
  170. _Nextra = 1;
  171. } else if (_Ch < 0x00010000u) {
  172. _By = static_cast<_Byte>(0xe0 | _Ch >> 12);
  173. _Nextra = 2;
  174. } else if (_Ch < 0x00200000u) {
  175. _By = static_cast<_Byte>(0xf0 | _Ch >> 18);
  176. _Nextra = 3;
  177. } else if (_Ch < 0x04000000u) {
  178. _By = static_cast<_Byte>(0xf8 | _Ch >> 24);
  179. _Nextra = 4;
  180. } else {
  181. _By = static_cast<_Byte>(0xfc | (_Ch >> 30 & 0x03));
  182. _Nextra = 5;
  183. }
  184. if (*_Pstate == 0) { // first time, maybe generate header
  185. *_Pstate = 1;
  186. constexpr bool _Generating = (_Mymode & generate_header) != 0;
  187. if constexpr (_Generating) {
  188. if (_Last2 - _Mid2 < 3 + 1 + _Nextra) {
  189. return _Mybase::partial; // not enough room for both
  190. }
  191. // prepend header
  192. *_Mid2++ = '\xef';
  193. *_Mid2++ = '\xbb';
  194. *_Mid2++ = '\xbf';
  195. }
  196. }
  197. if (_Last2 - _Mid2 < 1 + _Nextra) {
  198. break; // not enough room for output
  199. }
  200. ++_Mid1;
  201. for (*_Mid2++ = _By; 0 < _Nextra;) {
  202. *_Mid2++ = static_cast<_Byte>((_Ch >> 6 * --_Nextra & 0x3f) | 0x80);
  203. }
  204. }
  205. return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
  206. }
  207. result do_unshift(std::mbstate_t&, _Byte* _First2, _Byte*, _Byte*& _Mid2) const override {
  208. // generate bytes to return to default shift state
  209. _Mid2 = _First2;
  210. return _Mybase::noconv;
  211. }
  212. friend int detail::_Codecvt_do_length<>(const codecvt_utf8&, std::mbstate_t&, const _Byte*, const _Byte*, std::size_t);
  213. int do_length(
  214. std::mbstate_t& _State, const _Byte* _First1, const _Byte* _Last1, std::size_t _Count) const noexcept override {
  215. return detail::_Codecvt_do_length(*this, _State, _First1, _Last1, _Count);
  216. }
  217. bool do_always_noconv() const noexcept override {
  218. // return true if conversions never change input
  219. return false;
  220. }
  221. int do_max_length() const noexcept override {
  222. // return maximum length required for a conversion
  223. if constexpr ((_Mymode & (consume_header | generate_header)) != 0) {
  224. return 9;
  225. } else {
  226. return 6;
  227. }
  228. }
  229. int do_encoding() const noexcept override {
  230. // return length of code sequence (from codecvt)
  231. if constexpr ((_Mymode & (consume_header | generate_header)) != 0) {
  232. return -1; // -1 => state dependent
  233. } else {
  234. return 0; // 0 => varying length
  235. }
  236. }
  237. };
  238. template <
  239. class _Elem,
  240. class CharT = char,
  241. unsigned long _Mymax = 0x10ffff,
  242. asio2::codecvt_mode _Mymode = asio2::codecvt_mode{}>
  243. class codecvt_utf16 : public std::codecvt<_Elem, CharT, std::mbstate_t> {
  244. // facet for converting between _Elem and UTF-16 multibyte sequences
  245. private:
  246. enum { _Bytes_per_word = 2 };
  247. public:
  248. using _Mybase = std::codecvt<_Elem, CharT, std::mbstate_t>;
  249. using result = typename _Mybase::result;
  250. using _Byte = CharT;
  251. using intern_type = _Elem;
  252. using extern_type = _Byte;
  253. using state_type = std::mbstate_t;
  254. explicit codecvt_utf16(std::size_t _Refs = 0) : _Mybase(_Refs) {}
  255. ~codecvt_utf16() noexcept override {}
  256. protected:
  257. result do_in(std::mbstate_t& _State, const _Byte* _First1, const _Byte* _Last1, const _Byte*& _Mid1,
  258. _Elem* _First2, _Elem* _Last2, _Elem*& _Mid2) const override {
  259. // convert bytes [_First1, _Last1) to [_First2, _Last2)
  260. std::int8_t* _Pstate = reinterpret_cast<std::int8_t*>(&_State);
  261. _Mid1 = _First1;
  262. _Mid2 = _First2;
  263. while (_Bytes_per_word <= _Last1 - _Mid1 && _Mid2 != _Last2) { // convert a multibyte sequence
  264. const auto _Ptr = reinterpret_cast<const std::make_unsigned_t<CharT>*>(_Mid1);
  265. unsigned long _Ch;
  266. unsigned short _Ch0;
  267. unsigned short _Ch1;
  268. if (*_Pstate == detail::_Codecvt_Little_first) {
  269. _Ch0 = static_cast<unsigned short>(_Ptr[1] << 8 | _Ptr[0]);
  270. } else if (*_Pstate == detail::_Codecvt_Big_first) {
  271. _Ch0 = static_cast<unsigned short>(_Ptr[0] << 8 | _Ptr[1]);
  272. } else { // no header seen yet, try preferred mode
  273. constexpr bool _Prefer_LE = (_Mymode & little_endian) != 0;
  274. constexpr std::int8_t _Default_endian = _Prefer_LE ? detail::_Codecvt_Little_first : detail::_Codecvt_Big_first;
  275. if constexpr (_Prefer_LE) {
  276. _Ch0 = static_cast<unsigned short>(_Ptr[1] << 8 | _Ptr[0]);
  277. } else {
  278. _Ch0 = static_cast<unsigned short>(_Ptr[0] << 8 | _Ptr[1]);
  279. }
  280. *_Pstate = _Default_endian;
  281. constexpr bool _Consuming = (_Mymode & consume_header) != 0;
  282. if constexpr (_Consuming) {
  283. if (_Ch0 == 0xfffeu) {
  284. *_Pstate = 3 - _Default_endian;
  285. }
  286. if (_Ch0 == 0xfffeu || _Ch0 == 0xfeffu) { // consume header, fixate on endianness, and retry
  287. _Mid1 += _Bytes_per_word;
  288. result _Ans = do_in(_State, _Mid1, _Last1, _Mid1, _First2, _Last2, _Mid2);
  289. if (_Ans == _Mybase::partial) { // not enough bytes, roll back header
  290. *_Pstate = 0;
  291. _Mid1 = _First1;
  292. }
  293. return _Ans;
  294. }
  295. }
  296. }
  297. if (_Ch0 < 0xd800u || 0xdc00u <= _Ch0) { // one word, consume bytes
  298. _Mid1 += _Bytes_per_word;
  299. _Ch = _Ch0;
  300. } else if (_Last1 - _Mid1 < 2 * _Bytes_per_word) {
  301. break;
  302. } else { // get second word
  303. if (*_Pstate == detail::_Codecvt_Little_first) {
  304. _Ch1 = static_cast<unsigned short>(_Ptr[3] << 8 | _Ptr[2]);
  305. } else {
  306. _Ch1 = static_cast<unsigned short>(_Ptr[2] << 8 | _Ptr[3]);
  307. }
  308. if (_Ch1 < 0xdc00u || 0xe000u <= _Ch1) {
  309. return _Mybase::error;
  310. }
  311. _Mid1 += 2 * _Bytes_per_word;
  312. _Ch = static_cast<unsigned long>(_Ch0 - 0xd800 + 0x0040) << 10 | (_Ch1 - 0xdc00);
  313. }
  314. if (_Mymax < _Ch) {
  315. return _Mybase::error; // code too large
  316. }
  317. *_Mid2++ = static_cast<_Elem>(_Ch);
  318. }
  319. return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
  320. }
  321. result do_out(std::mbstate_t& _State, const _Elem* _First1, const _Elem* _Last1, const _Elem*& _Mid1,
  322. _Byte* _First2, _Byte* _Last2, _Byte*& _Mid2) const override {
  323. // convert [_First1, _Last1) to bytes [_First2, _Last2)
  324. std::int8_t* _Pstate = reinterpret_cast<std::int8_t*>(&_State);
  325. _Mid1 = _First1;
  326. _Mid2 = _First2;
  327. if (*_Pstate == 0) { // determine endianness once, maybe generate header
  328. if constexpr ((_Mymode & little_endian) != 0) {
  329. *_Pstate = detail::_Codecvt_Little_first;
  330. } else {
  331. *_Pstate = detail::_Codecvt_Big_first;
  332. }
  333. constexpr bool _Generating = (_Mymode & generate_header) != 0;
  334. if constexpr (_Generating) {
  335. if (_Last2 - _Mid2 < 3 * _Bytes_per_word) {
  336. return _Mybase::partial; // not enough room for all
  337. }
  338. if (*_Pstate == detail::_Codecvt_Little_first) { // put header LS byte first
  339. *_Mid2++ = '\xff';
  340. *_Mid2++ = '\xfe';
  341. } else { // put header MS byte first
  342. *_Mid2++ = '\xfe';
  343. *_Mid2++ = '\xff';
  344. }
  345. }
  346. }
  347. while (_Mid1 != _Last1 && _Bytes_per_word <= _Last2 - _Mid2) { // convert and put a widechar
  348. bool _Extra = false;
  349. unsigned long _Ch = static_cast<unsigned long>(*_Mid1++);
  350. if ((_Mymax < 0x10ffffu ? _Mymax : 0x10ffffu) < _Ch) {
  351. return _Mybase::error; // value too large
  352. }
  353. if (_Ch <= 0xffffu) { // one word, can't be code for first of two
  354. if (0xd800u <= _Ch && _Ch < 0xdc00u) {
  355. return _Mybase::error;
  356. }
  357. } else if (_Last2 - _Mid2 < 2 * _Bytes_per_word) { // not enough room for two-word output, back up
  358. --_Mid1;
  359. return _Mybase::partial;
  360. } else {
  361. _Extra = true;
  362. }
  363. if (*_Pstate == detail::_Codecvt_Little_first) {
  364. if (_Extra) { // put a pair of words LS byte first
  365. unsigned short _Ch0 =
  366. static_cast<unsigned short>(0xd800 | (static_cast<unsigned short>(_Ch >> 10) - 0x0040));
  367. *_Mid2++ = static_cast<_Byte>(_Ch0);
  368. *_Mid2++ = static_cast<_Byte>(_Ch0 >> 8);
  369. _Ch0 = static_cast<unsigned short>(0xdc00 | (static_cast<unsigned short>(_Ch) & 0x03ff));
  370. *_Mid2++ = static_cast<_Byte>(_Ch0);
  371. *_Mid2++ = static_cast<_Byte>(_Ch0 >> 8);
  372. } else { // put a single word LS byte first
  373. *_Mid2++ = static_cast<_Byte>(_Ch);
  374. *_Mid2++ = static_cast<_Byte>(_Ch >> 8);
  375. }
  376. } else {
  377. if (_Extra) { // put a pair of words MS byte first
  378. unsigned short _Ch0 =
  379. static_cast<unsigned short>(0xd800 | (static_cast<unsigned short>(_Ch >> 10) - 0x0040));
  380. *_Mid2++ = static_cast<_Byte>(_Ch0 >> 8);
  381. *_Mid2++ = static_cast<_Byte>(_Ch0);
  382. _Ch0 = static_cast<unsigned short>(0xdc00 | (static_cast<unsigned short>(_Ch) & 0x03ff));
  383. *_Mid2++ = static_cast<_Byte>(_Ch0 >> 8);
  384. *_Mid2++ = static_cast<_Byte>(_Ch0);
  385. } else { // put a single word MS byte first
  386. *_Mid2++ = static_cast<_Byte>(_Ch >> 8);
  387. *_Mid2++ = static_cast<_Byte>(_Ch);
  388. }
  389. }
  390. }
  391. return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
  392. }
  393. result do_unshift(std::mbstate_t&, _Byte* _First2, _Byte*, _Byte*& _Mid2) const override {
  394. // generate bytes to return to default shift state
  395. _Mid2 = _First2;
  396. return _Mybase::noconv;
  397. }
  398. friend int detail::_Codecvt_do_length<>(const codecvt_utf16&, std::mbstate_t&, const _Byte*, const _Byte*, std::size_t);
  399. int do_length(
  400. std::mbstate_t& _State, const _Byte* _First1, const _Byte* _Last1, std::size_t _Count) const noexcept override {
  401. return detail::_Codecvt_do_length(*this, _State, _First1, _Last1, _Count);
  402. }
  403. bool do_always_noconv() const noexcept override {
  404. // return true if conversions never change input
  405. return false;
  406. }
  407. int do_max_length() const noexcept override {
  408. // return maximum length required for a conversion
  409. if constexpr ((_Mymode & (consume_header | generate_header)) != 0) {
  410. return 3 * _Bytes_per_word;
  411. } else {
  412. return 6 * _Bytes_per_word;
  413. }
  414. }
  415. int do_encoding() const noexcept override {
  416. // return length of code sequence (from codecvt)
  417. if constexpr ((_Mymode & (consume_header | generate_header)) != 0) {
  418. return -1; // -1 => state dependent
  419. } else {
  420. return 0; // 0 => varying length
  421. }
  422. }
  423. };
  424. template <
  425. class _Elem,
  426. class CharT = char,
  427. unsigned long _Mymax = 0x10ffff,
  428. asio2::codecvt_mode _Mymode = asio2::codecvt_mode{}>
  429. class codecvt_utf8_utf16
  430. : public std::codecvt<_Elem, CharT, std::mbstate_t> { // facet for converting between UTF-16 _Elem and UTF-8 byte sequences
  431. public:
  432. using _Mybase = std::codecvt<_Elem, CharT, std::mbstate_t>;
  433. using result = typename _Mybase::result;
  434. using _Byte = CharT;
  435. using intern_type = _Elem;
  436. using extern_type = _Byte;
  437. using state_type = std::mbstate_t;
  438. static_assert(sizeof(unsigned short) <= sizeof(state_type), "state_type too small");
  439. explicit codecvt_utf8_utf16(std::size_t _Refs = 0) : _Mybase(_Refs) {}
  440. ~codecvt_utf8_utf16() noexcept override {}
  441. protected:
  442. result do_in(std::mbstate_t& _State, const _Byte* _First1, const _Byte* _Last1, const _Byte*& _Mid1,
  443. _Elem* _First2, _Elem* _Last2, _Elem*& _Mid2) const override {
  444. // convert bytes [_First1, _Last1) to [_First2, _Last2)
  445. unsigned short* _Pstate = reinterpret_cast<unsigned short*>(&_State);
  446. _Mid1 = _First1;
  447. _Mid2 = _First2;
  448. while (_Mid1 != _Last1 && _Mid2 != _Last2) { // convert a multibyte sequence
  449. unsigned long _By = static_cast<std::make_unsigned_t<CharT>>(*_Mid1);
  450. unsigned long _Ch;
  451. int _Nextra;
  452. int _Nskip;
  453. if (*_Pstate > 1u) {
  454. if (_By < 0x80u || 0xc0u <= _By) {
  455. return _Mybase::error; // not continuation byte
  456. }
  457. // deliver second half of two-word value
  458. ++_Mid1;
  459. *_Mid2++ = static_cast<_Elem>(*_Pstate | (_By & 0x3f));
  460. *_Pstate = 1;
  461. continue;
  462. }
  463. if (_By < 0x80u) {
  464. _Ch = _By;
  465. _Nextra = 0;
  466. } else if (_By < 0xc0u) { // 0x80-0xbf not first byte
  467. ++_Mid1;
  468. return _Mybase::error;
  469. } else if (_By < 0xe0u) {
  470. _Ch = _By & 0x1f;
  471. _Nextra = 1;
  472. } else if (_By < 0xf0u) {
  473. _Ch = _By & 0x0f;
  474. _Nextra = 2;
  475. } else if (_By < 0xf8u) {
  476. _Ch = _By & 0x07;
  477. _Nextra = 3;
  478. } else {
  479. _Ch = _By & 0x03;
  480. _Nextra = _By < 0xfc ? 4 : 5;
  481. }
  482. _Nskip = _Nextra < 3 ? 0 : 1; // leave a byte for 2nd word
  483. _First1 = _Mid1; // roll back point
  484. if (_Nextra == 0) {
  485. ++_Mid1;
  486. } else if (_Last1 - _Mid1 < _Nextra + 1 - _Nskip) {
  487. break; // not enough input
  488. } else {
  489. for (++_Mid1; _Nskip < _Nextra; --_Nextra, ++_Mid1) {
  490. if ((_By = static_cast<std::make_unsigned_t<CharT>>(*_Mid1)) < 0x80u || 0xc0u <= _By) {
  491. return _Mybase::error; // not continuation byte
  492. }
  493. _Ch = _Ch << 6 | (_By & 0x3f);
  494. }
  495. }
  496. if (0 < _Nskip) {
  497. _Ch <<= 6; // get last byte on next call
  498. }
  499. if ((_Mymax < 0x10ffffu ? _Mymax : 0x10ffffu) < _Ch) {
  500. return _Mybase::error; // value too large
  501. }
  502. if (0xffffu < _Ch) { // deliver first half of two-word value, save second word
  503. unsigned short _Ch0 = static_cast<unsigned short>(0xd800 | ((_Ch >> 10) - 0x0040));
  504. *_Mid2++ = static_cast<_Elem>(_Ch0);
  505. *_Pstate = static_cast<unsigned short>(0xdc00 | (_Ch & 0x03ff));
  506. continue;
  507. }
  508. if (_Nskip != 0) {
  509. if (_Mid1 == _Last1) { // not enough bytes, noncanonical value
  510. _Mid1 = _First1;
  511. break;
  512. }
  513. if ((_By = static_cast<std::make_unsigned_t<CharT>>(*_Mid1++)) < 0x80u || 0xc0u <= _By) {
  514. return _Mybase::error; // not continuation byte
  515. }
  516. _Ch |= _By & 0x3f; // complete noncanonical value
  517. }
  518. if (*_Pstate == 0u) { // first time, maybe look for and consume header
  519. *_Pstate = 1;
  520. constexpr bool _Consuming = (_Mymode & consume_header) != 0;
  521. if constexpr (_Consuming) {
  522. if (_Ch == 0xfeffu) { // drop header and retry
  523. result _Ans = do_in(_State, _Mid1, _Last1, _Mid1, _First2, _Last2, _Mid2);
  524. if (_Ans == _Mybase::partial) { // roll back header determination
  525. *_Pstate = 0;
  526. _Mid1 = _First1;
  527. }
  528. return _Ans;
  529. }
  530. }
  531. }
  532. *_Mid2++ = static_cast<_Elem>(_Ch);
  533. }
  534. return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
  535. }
  536. result do_out(std::mbstate_t& _State, const _Elem* _First1, const _Elem* _Last1, const _Elem*& _Mid1,
  537. _Byte* _First2, _Byte* _Last2, _Byte*& _Mid2) const override {
  538. // convert [_First1, _Last1) to bytes [_First2, _Last2)
  539. unsigned short* _Pstate = reinterpret_cast<unsigned short*>(&_State);
  540. _Mid1 = _First1;
  541. _Mid2 = _First2;
  542. while (_Mid1 != _Last1 && _Mid2 != _Last2) { // convert and put a widechar
  543. unsigned long _Ch;
  544. unsigned short _Ch1 = static_cast<unsigned short>(*_Mid1);
  545. bool _Save = false;
  546. if (1u < *_Pstate) { // get saved MS 11 bits from *_Pstate
  547. if (_Ch1 < 0xdc00u || 0xe000u <= _Ch1) {
  548. return _Mybase::error; // bad second word
  549. }
  550. _Ch = static_cast<unsigned long>((*_Pstate << 10) | (_Ch1 - 0xdc00));
  551. } else if (0xd800u <= _Ch1 && _Ch1 < 0xdc00u) { // get new first word
  552. _Ch = static_cast<unsigned long>((_Ch1 - 0xd800 + 0x0040) << 10);
  553. _Save = true; // put only first byte, rest with second word
  554. } else {
  555. _Ch = _Ch1; // not first word, just put it
  556. }
  557. _Byte _By;
  558. int _Nextra;
  559. if (_Ch < 0x0080u) {
  560. _By = static_cast<_Byte>(_Ch);
  561. _Nextra = 0;
  562. } else if (_Ch < 0x0800u) {
  563. _By = static_cast<_Byte>(0xc0 | _Ch >> 6);
  564. _Nextra = 1;
  565. } else if (_Ch < 0x10000u) {
  566. _By = static_cast<_Byte>(0xe0 | _Ch >> 12);
  567. _Nextra = 2;
  568. } else {
  569. _By = static_cast<_Byte>(0xf0 | _Ch >> 18);
  570. _Nextra = 3;
  571. }
  572. int _Nput = _Nextra < 3 ? _Nextra + 1 : _Save ? 1 : 3;
  573. if (_Last2 - _Mid2 < _Nput) {
  574. break; // not enough room, even without header
  575. }
  576. if constexpr ((_Mymode & generate_header) != 0) { // header to put
  577. if (*_Pstate == 0u) {
  578. if (_Last2 - _Mid2 < 3 + _Nput) {
  579. break; // not enough room for header + output
  580. }
  581. // prepend header
  582. *_Mid2++ = '\xef';
  583. *_Mid2++ = '\xbb';
  584. *_Mid2++ = '\xbf';
  585. }
  586. }
  587. ++_Mid1;
  588. if (_Save || _Nextra < 3) { // put first byte of sequence, if not already put
  589. *_Mid2++ = _By;
  590. --_Nput;
  591. }
  592. for (; 0 < _Nput; --_Nput) {
  593. *_Mid2++ = static_cast<_Byte>((_Ch >> 6 * --_Nextra & 0x3f) | 0x80);
  594. }
  595. *_Pstate = static_cast<unsigned short>(_Save ? _Ch >> 10 : 1);
  596. }
  597. return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
  598. }
  599. result do_unshift(std::mbstate_t&, _Byte* _First2, _Byte*, _Byte*& _Mid2) const override {
  600. // generate bytes to return to default shift state
  601. _Mid2 = _First2;
  602. return _Mybase::noconv;
  603. }
  604. friend int detail::_Codecvt_do_length<>(const codecvt_utf8_utf16&, std::mbstate_t&, const _Byte*, const _Byte*, std::size_t);
  605. int do_length(
  606. std::mbstate_t& _State, const _Byte* _First1, const _Byte* _Last1, std::size_t _Count) const noexcept override {
  607. return detail::_Codecvt_do_length(*this, _State, _First1, _Last1, _Count);
  608. }
  609. bool do_always_noconv() const noexcept override {
  610. // return true if conversions never change input
  611. return false;
  612. }
  613. int do_max_length() const noexcept override {
  614. // return maximum length required for a conversion
  615. if constexpr ((_Mymode & consume_header) != 0) {
  616. return 9; // header + max input
  617. } else if constexpr ((_Mymode & generate_header) != 0) {
  618. return 7; // header + max output
  619. } else {
  620. return 6; // 6-byte max input sequence, no 3-byte header
  621. }
  622. }
  623. int do_encoding() const noexcept override {
  624. // return length of code sequence (from codecvt)
  625. return 0; // 0 => varying length
  626. }
  627. };
  628. template <
  629. class _Codecvt,
  630. class _Elem = wchar_t,
  631. class CharT = char,
  632. class _Traits = std::char_traits<_Elem>>
  633. class wbuffer_convert
  634. : public std::basic_streambuf<_Elem, _Traits> { // stream buffer associated with a codecvt facet
  635. private:
  636. enum _Mode { _Unused, _Wrote, _Need, _Got, _Eof };
  637. enum { _STRING_INC = 8 };
  638. public:
  639. using _Mysb = std::basic_streambuf<CharT>;
  640. using _Byte_traits = std::char_traits<CharT>;
  641. using int_type = typename _Traits::int_type;
  642. using pos_type = typename _Traits::pos_type;
  643. using off_type = typename _Traits::off_type;
  644. using state_type = typename _Codecvt::state_type;
  645. wbuffer_convert() : _State(), _Pcvt(new _Codecvt), _Mystrbuf(nullptr), _Status(_Unused), _Nback(0) {
  646. // construct without buffer pointer
  647. _Loc = std::locale(_Loc, const_cast<_Codecvt*>(_Pcvt));
  648. }
  649. explicit wbuffer_convert(_Mysb* _Strbuf)
  650. : _State(), _Pcvt(new _Codecvt), _Mystrbuf(_Strbuf), _Status(_Unused), _Nback(0) {
  651. // construct with byte stream buffer pointer
  652. _Loc = std::locale(_Loc, const_cast<_Codecvt*>(_Pcvt));
  653. }
  654. wbuffer_convert(_Mysb* _Strbuf, const _Codecvt* _Pcvt_arg)
  655. : _State(), _Pcvt(_Pcvt_arg), _Mystrbuf(_Strbuf), _Status(_Unused), _Nback(0) {
  656. // construct with byte stream buffer pointer and codecvt
  657. _Loc = std::locale(_Loc, const_cast<_Codecvt*>(_Pcvt));
  658. }
  659. wbuffer_convert(_Mysb* _Strbuf, const _Codecvt* _Pcvt_arg, state_type _State_arg)
  660. : _State(_State_arg), _Pcvt(_Pcvt_arg), _Mystrbuf(_Strbuf), _Status(_Unused), _Nback(0) {
  661. // construct with byte stream buffer pointer, codecvt, and state
  662. _Loc = std::locale(_Loc, const_cast<_Codecvt*>(_Pcvt));
  663. }
  664. ~wbuffer_convert() noexcept override {
  665. while (_Status == _Wrote) { // put any trailing homing shift
  666. if (_Str.size() < _STRING_INC) {
  667. _Str.assign(_STRING_INC, '\0');
  668. }
  669. CharT* _Buf = &_Str[0];
  670. CharT* _Dest;
  671. switch (_Pcvt->unshift(_State, _Buf, _Buf + _Str.size(), _Dest)) { // test result of homing conversion
  672. case _Codecvt::ok:
  673. _Status = _Unused; // homed successfully
  674. case _Codecvt::partial: // fall through
  675. { // put any generated bytes
  676. ptrdiff_t _Count = _Dest - _Buf;
  677. if (0 < _Count
  678. && _Byte_traits::eq_int_type(
  679. _Byte_traits::eof(), static_cast<typename _Byte_traits::int_type>(_Mystrbuf->sputn(_Buf, _Count)))) {
  680. return; // write failed
  681. }
  682. if (_Status == _Wrote && _Count == 0) {
  683. _Str.append(_STRING_INC, '\0'); // try with more space
  684. }
  685. break;
  686. }
  687. case _Codecvt::noconv:
  688. return; // nothing to do
  689. default:
  690. return; // conversion failed
  691. }
  692. }
  693. }
  694. [[nodiscard]] _Mysb* rdbuf() const {
  695. return _Mystrbuf;
  696. }
  697. _Mysb* rdbuf(_Mysb* _Strbuf) { // set byte stream buffer pointer
  698. _Mysb* _Oldstrbuf = _Mystrbuf;
  699. _Mystrbuf = _Strbuf;
  700. return _Oldstrbuf;
  701. }
  702. [[nodiscard]] state_type state() const {
  703. return _State;
  704. }
  705. wbuffer_convert(const wbuffer_convert&) = delete;
  706. wbuffer_convert& operator=(const wbuffer_convert&) = delete;
  707. protected:
  708. int_type overflow(int_type _Meta = _Traits::eof()) override { // put an element to stream
  709. if (_Traits::eq_int_type(_Traits::eof(), _Meta)) {
  710. return _Traits::not_eof(_Meta); // EOF, return success code
  711. } else if (!_Mystrbuf || 0 < _Nback || (_Status != _Unused && _Status != _Wrote)) {
  712. return _Traits::eof(); // no buffer or reading, fail
  713. } else { // put using codecvt facet
  714. const _Elem _Ch = _Traits::to_char_type(_Meta);
  715. if (_Str.size() < _STRING_INC) {
  716. _Str.assign(_STRING_INC, '\0');
  717. }
  718. for (_Status = _Wrote;;) {
  719. CharT* _Buf = &_Str[0];
  720. const _Elem* _Src;
  721. CharT* _Dest;
  722. // test result of converting one element
  723. switch (_Pcvt->out(_State, &_Ch, &_Ch + 1, _Src, _Buf, _Buf + _Str.size(), _Dest)) {
  724. case _Codecvt::partial:
  725. case _Codecvt::ok:
  726. { // converted something, try to put it out
  727. ptrdiff_t _Count = _Dest - _Buf;
  728. if (0 < _Count
  729. && _Byte_traits::eq_int_type(_Byte_traits::eof(),
  730. static_cast<typename _Byte_traits::int_type>(_Mystrbuf->sputn(_Buf, _Count)))) {
  731. return _Traits::eof(); // write failed
  732. }
  733. if (_Src != &_Ch) {
  734. return _Meta; // converted whole element
  735. }
  736. if (0 >= _Count) {
  737. if (_Str.size() >= 4 * _STRING_INC) {
  738. return _Traits::eof(); // conversion failed
  739. }
  740. _Str.append(_STRING_INC, '\0'); // try with more space
  741. }
  742. break;
  743. }
  744. case _Codecvt::noconv:
  745. if (_Traits::eq_int_type(
  746. _Traits::eof(), static_cast<int_type>(_Mystrbuf->sputn(reinterpret_cast<const char*>(&_Ch),
  747. static_cast<std::streamsize>(sizeof(_Elem)))))) {
  748. return _Traits::eof();
  749. }
  750. return _Meta; // put native byte order
  751. default:
  752. return _Traits::eof(); // conversion failed
  753. }
  754. }
  755. }
  756. }
  757. int_type pbackfail(int_type _Meta = _Traits::eof()) override { // put an element back to stream
  758. if (sizeof(_Myback) / sizeof(_Myback[0]) <= _Nback || _Status == _Wrote) {
  759. return _Traits::eof(); // nowhere to put back
  760. } else { // enough room, put it back
  761. if (!_Traits::eq_int_type(_Traits::eof(), _Meta)) {
  762. _Myback[_Nback] = _Traits::to_char_type(_Meta);
  763. }
  764. ++_Nback;
  765. if (_Status == _Unused) {
  766. _Status = _Got;
  767. }
  768. return _Meta;
  769. }
  770. }
  771. int_type underflow() override { // get an element from stream, but don't point past it
  772. int_type _Meta;
  773. if (0 >= _Nback) {
  774. if (_Traits::eq_int_type(_Traits::eof(), _Meta = _Get_elem())) {
  775. return _Meta; // _Get_elem failed, return EOF
  776. }
  777. _Myback[_Nback++] = _Traits::to_char_type(_Meta);
  778. }
  779. return _Traits::to_int_type(_Myback[_Nback - 1]);
  780. }
  781. int_type uflow() override { // get an element from stream, point past it
  782. int_type _Meta;
  783. if (0 >= _Nback) {
  784. if (_Traits::eq_int_type(_Traits::eof(), _Meta = _Get_elem())) {
  785. return _Meta; // _Get_elem failed, return EOF
  786. }
  787. _Myback[_Nback++] = _Traits::to_char_type(_Meta);
  788. }
  789. return _Traits::to_int_type(_Myback[--_Nback]);
  790. }
  791. pos_type seekoff(off_type, std::ios_base::seekdir,
  792. std::ios_base::openmode = static_cast<std::ios_base::openmode>(std::ios_base::in | std::ios_base::out)) override {
  793. return pos_type(-1); // always fail
  794. }
  795. pos_type seekpos(
  796. pos_type, std::ios_base::openmode = static_cast<std::ios_base::openmode>(std::ios_base::in | std::ios_base::out)) override {
  797. return pos_type(-1); // always fail
  798. }
  799. private:
  800. int_type _Get_elem() { // compose an element from byte stream buffer
  801. if (_Mystrbuf && _Status != _Wrote) { // got buffer, haven't written, try to compose an element
  802. if (_Status != _Eof) {
  803. if (_Str.empty()) {
  804. _Status = _Need;
  805. } else {
  806. _Status = _Got;
  807. }
  808. }
  809. while (_Status != _Eof) { // get using codecvt facet
  810. CharT* _Buf = &_Str[0];
  811. _Elem _Ch;
  812. _Elem* _Dest;
  813. const CharT* _Src;
  814. int _Meta;
  815. if (_Status == _Need) {
  816. if (_Byte_traits::eq_int_type(_Byte_traits::eof(), _Meta = _Mystrbuf->sbumpc())) {
  817. _Status = _Eof;
  818. } else {
  819. _Str.push_back(_Byte_traits::to_char_type(_Meta));
  820. }
  821. }
  822. // test result of converting one element
  823. switch (_Pcvt->in(_State, _Buf, _Buf + _Str.size(), _Src, &_Ch, &_Ch + 1, _Dest)) {
  824. case _Codecvt::partial:
  825. case _Codecvt::ok:
  826. _Str.erase(0, static_cast<std::size_t>(_Src - _Buf)); // discard any used input
  827. if (_Dest != &_Ch) {
  828. return _Traits::to_int_type(_Ch);
  829. }
  830. break;
  831. case _Codecvt::noconv:
  832. if (_Str.size() < sizeof(_Elem)) {
  833. break; // no conversion, but need more chars
  834. }
  835. std::memcpy(&_Ch, _Buf, sizeof(_Elem)); // copy raw bytes to element
  836. _Str.erase(0, sizeof(_Elem));
  837. return _Traits::to_int_type(_Ch); // return result
  838. default:
  839. _Status = _Eof; // conversion failed
  840. break;
  841. }
  842. }
  843. }
  844. return _Traits::eof();
  845. }
  846. state_type _State; // code conversion state
  847. const _Codecvt* _Pcvt; // the codecvt facet
  848. _Mysb* _Mystrbuf; // pointer to stream buffer
  849. _Mode _Status; // buffer read/write status
  850. std::size_t _Nback; // number of elements in putback buffer
  851. _Elem _Myback[8]; // putback buffer
  852. std::basic_string<CharT> _Str; // unconsumed input bytes
  853. std::locale _Loc; // manages reference to codecvt facet
  854. };
  855. template <
  856. class _Codecvt,
  857. class _Elem = wchar_t,
  858. class CharT = char,
  859. class _Walloc = std::allocator<_Elem>,
  860. class _Balloc = std::allocator<CharT>>
  861. class wstring_convert { // converts between _Elem (wide) and char (byte) strings
  862. private:
  863. enum { _BUF_INC = 8, _BUF_MAX = 16 };
  864. void _Init(const _Codecvt* _Pcvt_arg = new _Codecvt) { // initialize the object
  865. _State = state_type{};
  866. _Pcvt = _Pcvt_arg;
  867. _Loc = std::locale(_Loc, const_cast<_Codecvt*>(_Pcvt));
  868. _Nconv = 0;
  869. }
  870. public:
  871. using byte_string = std::basic_string<CharT, std::char_traits<CharT>, _Balloc>;
  872. using wide_string = std::basic_string<_Elem, std::char_traits<_Elem>, _Walloc>;
  873. using state_type = typename _Codecvt::state_type;
  874. using int_type = typename wide_string::traits_type::int_type;
  875. wstring_convert() : _Has_state(false), _Has_berr(false), _Has_werr(false) { // construct with no error strings
  876. _Init();
  877. }
  878. explicit wstring_convert(const _Codecvt* _Pcvt_arg)
  879. : _Has_state(false), _Has_berr(false), _Has_werr(false) { // construct with no error strings and codecvt
  880. _Init(_Pcvt_arg);
  881. }
  882. wstring_convert(const _Codecvt* _Pcvt_arg, state_type _State_arg)
  883. : _Has_state(true), _Has_berr(false), _Has_werr(false) { // construct with no error strings, codecvt, and state
  884. _Init(_Pcvt_arg);
  885. _State = _State_arg;
  886. }
  887. explicit wstring_convert(const byte_string& _Berr_arg)
  888. : _Berr(_Berr_arg), _Has_state(false), _Has_berr(true), _Has_werr(false) { // construct with byte error string
  889. _Init();
  890. }
  891. wstring_convert(const byte_string& _Berr_arg, const wide_string& _Werr_arg)
  892. : _Berr(_Berr_arg), _Werr(_Werr_arg), _Has_state(false), _Has_berr(true),
  893. _Has_werr(true) { // construct with byte and wide error strings
  894. _Init();
  895. }
  896. virtual ~wstring_convert() noexcept {}
  897. [[nodiscard]] std::size_t converted() const noexcept { // get conversion count
  898. return _Nconv;
  899. }
  900. [[nodiscard]] state_type state() const {
  901. return _State;
  902. }
  903. [[nodiscard]] wide_string from_bytes(CharT _Byte) { // convert a byte to a wide string
  904. return from_bytes(&_Byte, &_Byte + 1);
  905. }
  906. [[nodiscard]] wide_string from_bytes(const CharT* _Ptr) { // convert a NTBS to a wide string
  907. return from_bytes(_Ptr, _Ptr + std::strlen(_Ptr));
  908. }
  909. [[nodiscard]] wide_string from_bytes(const byte_string& _Bstr) { // convert a byte string to a wide string
  910. const CharT* _Ptr = _Bstr.c_str();
  911. return from_bytes(_Ptr, _Ptr + _Bstr.size());
  912. }
  913. [[nodiscard]] wide_string from_bytes(
  914. const CharT* _First, const CharT* _Last) { // convert byte sequence [_First, _Last) to a wide string
  915. wide_string _Wbuf;
  916. wide_string _Wstr;
  917. const CharT* _First_sav = _First;
  918. if (!_Has_state) {
  919. _State = state_type{}; // reset state if not remembered
  920. }
  921. _Wbuf.append(_BUF_INC, _Elem{});
  922. for (_Nconv = 0; _First != _Last; _Nconv = static_cast<std::size_t>(_First - _First_sav)) {
  923. // convert one or more bytes
  924. _Elem* _Dest = &_Wbuf[0];
  925. _Elem* _Dnext;
  926. // test result of converting one or more bytes
  927. switch (_Pcvt->in(_State, _First, _Last, _First, _Dest, _Dest + _Wbuf.size(), _Dnext)) {
  928. case _Codecvt::partial:
  929. case _Codecvt::ok:
  930. if (_Dest < _Dnext) {
  931. _Wstr.append(_Dest, static_cast<std::size_t>(_Dnext - _Dest));
  932. } else if (_Wbuf.size() < _BUF_MAX) {
  933. _Wbuf.append(_BUF_INC, _Elem{});
  934. } else if (_Has_werr) {
  935. return _Werr;
  936. } else {
  937. throw std::range_error("bad conversion");
  938. }
  939. break;
  940. case _Codecvt::noconv:
  941. for (; _First != _Last; ++_First) {
  942. _Wstr.push_back(static_cast<_Elem>(static_cast<std::make_unsigned_t<CharT>>(*_First)));
  943. }
  944. break; // no conversion, just copy code values
  945. default:
  946. if (_Has_werr) {
  947. return _Werr;
  948. } else {
  949. throw std::range_error("bad conversion");
  950. }
  951. }
  952. }
  953. return _Wstr;
  954. }
  955. [[nodiscard]] byte_string to_bytes(_Elem _Char) { // convert a widechar to a byte string
  956. return to_bytes(&_Char, &_Char + 1);
  957. }
  958. [[nodiscard]] byte_string to_bytes(const _Elem* _Wptr) { // convert a NTWCS to a byte string
  959. const _Elem* _Next = _Wptr;
  960. while (*_Next != 0) {
  961. ++_Next;
  962. }
  963. return to_bytes(_Wptr, _Next);
  964. }
  965. [[nodiscard]] byte_string to_bytes(const wide_string& _Wstr) { // convert a wide string to a byte string
  966. const _Elem* _Wptr = _Wstr.c_str();
  967. return to_bytes(_Wptr, _Wptr + _Wstr.size());
  968. }
  969. [[nodiscard]] byte_string to_bytes(
  970. const _Elem* _First, const _Elem* _Last) { // convert wide sequence [_First, _Last) to a byte string
  971. byte_string _Bbuf;
  972. byte_string _Bstr;
  973. const _Elem* _First_sav = _First;
  974. if (!_Has_state) {
  975. _State = state_type{}; // reset state if not remembered
  976. }
  977. _Bbuf.append(_BUF_INC, '\0');
  978. for (_Nconv = 0; _First != _Last; _Nconv = static_cast<std::size_t>(_First - _First_sav)) {
  979. // convert one or more wide chars
  980. CharT* _Dest = &_Bbuf[0];
  981. CharT* _Dnext;
  982. // test result of converting one or more wide chars
  983. switch (_Pcvt->out(_State, _First, _Last, _First, _Dest, _Dest + _Bbuf.size(), _Dnext)) {
  984. case _Codecvt::partial:
  985. case _Codecvt::ok:
  986. if (_Dest < _Dnext) {
  987. _Bstr.append(_Dest, static_cast<std::size_t>(_Dnext - _Dest));
  988. } else if (_Bbuf.size() < _BUF_MAX) {
  989. _Bbuf.append(_BUF_INC, '\0');
  990. } else if (_Has_berr) {
  991. return _Berr;
  992. } else {
  993. throw std::range_error("bad conversion");
  994. }
  995. break;
  996. case _Codecvt::noconv:
  997. for (; _First != _Last; ++_First) {
  998. _Bstr.push_back(static_cast<CharT>(static_cast<int_type>(*_First)));
  999. }
  1000. break; // no conversion, just copy code values
  1001. default:
  1002. if (_Has_berr) {
  1003. return _Berr;
  1004. } else {
  1005. throw std::range_error("bad conversion");
  1006. }
  1007. }
  1008. }
  1009. return _Bstr;
  1010. }
  1011. wstring_convert(const wstring_convert&) = delete;
  1012. wstring_convert& operator=(const wstring_convert&) = delete;
  1013. private:
  1014. const _Codecvt* _Pcvt; // the codecvt facet
  1015. std::locale _Loc; // manages reference to codecvt facet
  1016. byte_string _Berr;
  1017. wide_string _Werr;
  1018. state_type _State; // the remembered state
  1019. bool _Has_state;
  1020. bool _Has_berr;
  1021. bool _Has_werr;
  1022. std::size_t _Nconv;
  1023. };
  1024. }
  1025. namespace asio2
  1026. {
  1027. /**
  1028. * @brief Return default system locale name in POSIX format.
  1029. *
  1030. * This function tries to detect the locale using, LC_CTYPE, LC_ALL and LANG environment
  1031. * variables in this order and if all of them unset, in POSIX platforms it returns "C"
  1032. *
  1033. * On Windows additionally to check the above environment variables, this function
  1034. * tries to creates locale name from ISO-339 and ISO-3199 country codes defined
  1035. * for user default locale.
  1036. * If use_utf8_on_windows is true it sets the encoding to UTF-8, otherwise, if system
  1037. * locale supports ANSI code-page it defines the ANSI encoding like windows-1252, otherwise it fall-backs
  1038. * to UTF-8 encoding if ANSI code-page is not available.
  1039. *
  1040. * /boost/libs/locale/src/boost/locale/util/default_locale.cpp
  1041. */
  1042. inline std::string get_system_locale(bool use_utf8_on_windows = false)
  1043. {
  1044. char const *lang = 0;
  1045. if(!lang || !*lang)
  1046. lang = std::getenv("LC_CTYPE");
  1047. if(!lang || !*lang)
  1048. lang = std::getenv("LC_ALL");
  1049. if(!lang || !*lang)
  1050. lang = std::getenv("LANG");
  1051. #if !defined(BOOST_LOCALE_USE_WIN32_API) && !defined(BHO_LOCALE_USE_WIN32_API) && !defined(ASIO2_LOCALE_USE_WIN32_API)
  1052. (void)use_utf8_on_windows; // not relevant for non-windows
  1053. if(!lang || !*lang)
  1054. lang = "C";
  1055. return lang;
  1056. #else
  1057. if(lang && *lang) {
  1058. return lang;
  1059. }
  1060. char buf[10] = { 0 };
  1061. if(GetLocaleInfoA(LOCALE_USER_DEFAULT,LOCALE_SISO639LANGNAME,buf,sizeof(buf))==0)
  1062. return "C";
  1063. std::string lc_name = buf;
  1064. if(GetLocaleInfoA(LOCALE_USER_DEFAULT,LOCALE_SISO3166CTRYNAME,buf,sizeof(buf))!=0) {
  1065. lc_name += "_";
  1066. lc_name += buf;
  1067. }
  1068. if(!use_utf8_on_windows) {
  1069. if(GetLocaleInfoA(LOCALE_USER_DEFAULT,LOCALE_IDEFAULTANSICODEPAGE,buf,sizeof(buf))!=0) {
  1070. if(std::atoi(buf)==0)
  1071. lc_name+=".UTF-8";
  1072. else {
  1073. lc_name +=".windows-";
  1074. lc_name +=buf;
  1075. }
  1076. }
  1077. else {
  1078. lc_name += "UTF-8";
  1079. }
  1080. }
  1081. else {
  1082. lc_name += ".UTF-8";
  1083. }
  1084. return lc_name;
  1085. #endif
  1086. }
  1087. /**
  1088. * @brief Return default system locale name that can be used in codecvt.
  1089. *
  1090. */
  1091. inline std::string get_codecvt_locale(bool use_utf8_on_windows = false)
  1092. {
  1093. char const *lang = 0;
  1094. if(!lang || !*lang)
  1095. lang = std::getenv("LC_CTYPE");
  1096. if(!lang || !*lang)
  1097. lang = std::getenv("LC_ALL");
  1098. if(!lang || !*lang)
  1099. lang = std::getenv("LANG");
  1100. #if !defined(BOOST_LOCALE_USE_WIN32_API) && !defined(BHO_LOCALE_USE_WIN32_API) && !defined(ASIO2_LOCALE_USE_WIN32_API)
  1101. (void)use_utf8_on_windows; // not relevant for non-windows
  1102. if(!lang || !*lang)
  1103. lang = "C";
  1104. return lang;
  1105. #else
  1106. if(lang && *lang) {
  1107. return lang;
  1108. }
  1109. char buf[10] = { 0 };
  1110. std::string lc_name;
  1111. if(!use_utf8_on_windows) {
  1112. if(GetLocaleInfoA(LOCALE_USER_DEFAULT,LOCALE_IDEFAULTANSICODEPAGE,buf,sizeof(buf))!=0) {
  1113. if(std::atoi(buf)==0)
  1114. lc_name+=".UTF-8";
  1115. else {
  1116. lc_name +=".";
  1117. lc_name +=buf;
  1118. }
  1119. }
  1120. else {
  1121. lc_name += "UTF-8";
  1122. }
  1123. }
  1124. else {
  1125. lc_name += ".UTF-8";
  1126. }
  1127. return lc_name;
  1128. #endif
  1129. }
  1130. /**
  1131. * @brief Converts gbk characters to utf8 characters.
  1132. * @param str - gbk characters
  1133. * @return Converted value as std::string.
  1134. */
  1135. template<class StringT>
  1136. inline auto gbk_to_utf8(const StringT& str, const std::string& locale_name = "chs") noexcept
  1137. {
  1138. using CharT = typename detail::char_type<StringT>::type;
  1139. clear_last_error();
  1140. std::wstring w;
  1141. std::codecvt_byname<wchar_t, CharT, std::mbstate_t>* c = nullptr;
  1142. try
  1143. {
  1144. c = new std::codecvt_byname<wchar_t, CharT, std::mbstate_t>(locale_name);
  1145. }
  1146. catch (const std::exception&)
  1147. {
  1148. set_last_error(std::errc::invalid_argument);
  1149. return std::basic_string<CharT>{};
  1150. }
  1151. // gbk to widechar
  1152. {
  1153. auto sv = asio2::to_basic_string_view(str);
  1154. asio2::wstring_convert<std::codecvt_byname<wchar_t, CharT, std::mbstate_t>, wchar_t, CharT> conv(c);
  1155. try
  1156. {
  1157. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1158. }
  1159. catch (const std::range_error&)
  1160. {
  1161. set_last_error(std::errc::result_out_of_range);
  1162. sv = sv.substr(0, conv.converted());
  1163. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1164. }
  1165. }
  1166. // widechar to utf8
  1167. {
  1168. auto sv = asio2::to_basic_string_view(w);
  1169. asio2::wstring_convert<asio2::codecvt_utf8<wchar_t, CharT>, wchar_t, CharT> conv;
  1170. try
  1171. {
  1172. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1173. }
  1174. catch (const std::range_error&)
  1175. {
  1176. set_last_error(std::errc::result_out_of_range);
  1177. sv = sv.substr(0, conv.converted());
  1178. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1179. }
  1180. }
  1181. }
  1182. /**
  1183. * @brief Converts utf8 characters to gbk characters.
  1184. * @param str - gbk characters
  1185. * @return Converted value as std::string.
  1186. */
  1187. template<class StringT>
  1188. inline auto utf8_to_gbk(const StringT& str, const std::string& locale_name = "chs") noexcept
  1189. {
  1190. using CharT = typename detail::char_type<StringT>::type;
  1191. clear_last_error();
  1192. std::wstring w;
  1193. std::codecvt_byname<wchar_t, char, std::mbstate_t>* c = nullptr;
  1194. try
  1195. {
  1196. c = new std::codecvt_byname<wchar_t, char, std::mbstate_t>(locale_name);
  1197. }
  1198. catch (const std::exception&)
  1199. {
  1200. set_last_error(std::errc::invalid_argument);
  1201. return std::string{};
  1202. }
  1203. // utf8 to widechar
  1204. {
  1205. auto sv = asio2::to_basic_string_view(str);
  1206. asio2::wstring_convert<asio2::codecvt_utf8<wchar_t, CharT>, wchar_t, CharT> conv;
  1207. try
  1208. {
  1209. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1210. }
  1211. catch (const std::range_error&)
  1212. {
  1213. set_last_error(std::errc::result_out_of_range);
  1214. sv = sv.substr(0, conv.converted());
  1215. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1216. }
  1217. }
  1218. // widechar to gbk
  1219. {
  1220. auto sv = asio2::to_basic_string_view(w);
  1221. asio2::wstring_convert<std::codecvt_byname<wchar_t, char, std::mbstate_t>> conv(c);
  1222. try
  1223. {
  1224. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1225. }
  1226. catch (const std::range_error&)
  1227. {
  1228. set_last_error(std::errc::result_out_of_range);
  1229. sv = sv.substr(0, conv.converted());
  1230. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1231. }
  1232. }
  1233. }
  1234. /**
  1235. * @brief Converts wide characters to multibyte characters.
  1236. * @param str - wide characters
  1237. * @param locale_name - locale name
  1238. * @return Converted value as std::string.
  1239. */
  1240. template<class StringT>
  1241. inline std::string wcstombs(const StringT& str, const std::string& locale_name = asio2::get_codecvt_locale()) noexcept
  1242. {
  1243. clear_last_error();
  1244. auto sv = asio2::to_basic_string_view(str);
  1245. std::codecvt_byname<wchar_t, char, std::mbstate_t>* c = nullptr;
  1246. try
  1247. {
  1248. c = new std::codecvt_byname<wchar_t, char, std::mbstate_t>(locale_name);
  1249. }
  1250. catch (const std::exception&)
  1251. {
  1252. set_last_error(std::errc::invalid_argument);
  1253. return std::string{};
  1254. }
  1255. asio2::wstring_convert<std::codecvt_byname<wchar_t, char, std::mbstate_t>> conv(c);
  1256. try
  1257. {
  1258. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1259. }
  1260. catch (const std::range_error&)
  1261. {
  1262. set_last_error(std::errc::result_out_of_range);
  1263. sv = sv.substr(0, conv.converted());
  1264. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1265. }
  1266. }
  1267. /**
  1268. * @brief Converts multibyte characters to wide characters.
  1269. * @param str - wide characters
  1270. * @param locale_name - locale name
  1271. * @return Converted value as std::wstring.
  1272. */
  1273. template<class StringT>
  1274. inline std::wstring mbstowcs(const StringT& str, const std::string& locale_name = asio2::get_codecvt_locale()) noexcept
  1275. {
  1276. clear_last_error();
  1277. auto sv = asio2::to_basic_string_view(str);
  1278. std::codecvt_byname<wchar_t, char, std::mbstate_t>* c = nullptr;
  1279. try
  1280. {
  1281. c = new std::codecvt_byname<wchar_t, char, std::mbstate_t>(locale_name);
  1282. }
  1283. catch (const std::exception&)
  1284. {
  1285. set_last_error(std::errc::invalid_argument);
  1286. return std::wstring{};
  1287. }
  1288. asio2::wstring_convert<std::codecvt_byname<wchar_t, char, std::mbstate_t>> conv(c);
  1289. try
  1290. {
  1291. return conv.from_bytes(sv.data(), sv.data() + sv.size());
  1292. }
  1293. catch (const std::range_error&)
  1294. {
  1295. set_last_error(std::errc::result_out_of_range);
  1296. sv = sv.substr(0, conv.converted());
  1297. return conv.from_bytes(sv.data(), sv.data() + sv.size());
  1298. }
  1299. }
  1300. /**
  1301. * @brief Converts utf8 characters to current default locale characters.
  1302. * @param str - utf8 characters
  1303. * @return Converted value as std::string.
  1304. */
  1305. template<class StringT>
  1306. inline std::string utf8_to_locale(const StringT& str) noexcept
  1307. {
  1308. using CharT = typename detail::char_type<StringT>::type;
  1309. clear_last_error();
  1310. std::wstring w;
  1311. std::codecvt_byname<wchar_t, char, std::mbstate_t>* c = nullptr;
  1312. try
  1313. {
  1314. c = new std::codecvt_byname<wchar_t, char, std::mbstate_t>(asio2::get_codecvt_locale());
  1315. }
  1316. catch (const std::exception&)
  1317. {
  1318. set_last_error(std::errc::invalid_argument);
  1319. return std::string{};
  1320. }
  1321. // utf8 to widechar
  1322. {
  1323. auto sv = asio2::to_basic_string_view(str);
  1324. asio2::wstring_convert<asio2::codecvt_utf8<wchar_t, CharT>, wchar_t, CharT> conv;
  1325. try
  1326. {
  1327. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1328. }
  1329. catch (const std::range_error&)
  1330. {
  1331. set_last_error(std::errc::result_out_of_range);
  1332. sv = sv.substr(0, conv.converted());
  1333. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1334. }
  1335. }
  1336. // widechar to locale
  1337. {
  1338. auto sv = asio2::to_basic_string_view(w);
  1339. asio2::wstring_convert<std::codecvt_byname<wchar_t, char, std::mbstate_t>> conv(c);
  1340. try
  1341. {
  1342. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1343. }
  1344. catch (const std::range_error&)
  1345. {
  1346. set_last_error(std::errc::result_out_of_range);
  1347. sv = sv.substr(0, conv.converted());
  1348. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1349. }
  1350. }
  1351. }
  1352. /**
  1353. * @brief Converts current default locale characters to utf8 characters.
  1354. * @param str - current default locale characters
  1355. * @return Converted value as std::string.
  1356. */
  1357. template<class StringT>
  1358. inline std::string locale_to_utf8(const StringT& str) noexcept
  1359. {
  1360. using CharT = typename detail::char_type<StringT>::type;
  1361. clear_last_error();
  1362. std::wstring w;
  1363. std::codecvt_byname<wchar_t, CharT, std::mbstate_t>* c = nullptr;
  1364. try
  1365. {
  1366. c = new std::codecvt_byname<wchar_t, CharT, std::mbstate_t>(asio2::get_codecvt_locale());
  1367. }
  1368. catch (const std::exception&)
  1369. {
  1370. set_last_error(std::errc::invalid_argument);
  1371. return std::string{};
  1372. }
  1373. // locale to widechar
  1374. {
  1375. auto sv = asio2::to_basic_string_view(str);
  1376. asio2::wstring_convert<std::codecvt_byname<wchar_t, CharT, std::mbstate_t>, wchar_t, CharT> conv(c);
  1377. try
  1378. {
  1379. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1380. }
  1381. catch (const std::range_error&)
  1382. {
  1383. set_last_error(std::errc::result_out_of_range);
  1384. sv = sv.substr(0, conv.converted());
  1385. w = conv.from_bytes(sv.data(), sv.data() + sv.size());
  1386. }
  1387. }
  1388. // widechar to utf8
  1389. {
  1390. auto sv = asio2::to_basic_string_view(w);
  1391. asio2::wstring_convert<asio2::codecvt_utf8<wchar_t, CharT>, wchar_t, CharT> conv;
  1392. try
  1393. {
  1394. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1395. }
  1396. catch (const std::range_error&)
  1397. {
  1398. set_last_error(std::errc::result_out_of_range);
  1399. sv = sv.substr(0, conv.converted());
  1400. return conv.to_bytes(sv.data(), sv.data() + sv.size());
  1401. }
  1402. }
  1403. }
  1404. }
  1405. #include <asio2/base/detail/pop_options.hpp>
  1406. #endif // !__ASIO2_CODECVT_HPP__