unicode.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. /*=============================================================================
  2. Copyright (c) 2001-2011 Hartmut Kaiser
  3. Copyright (c) 2001-2011 Joel de Guzman
  4. Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. =============================================================================*/
  7. #if !defined(BOOST_SPIRIT_UNICODE_1_JANUARY_12_2010_0728PM)
  8. #define BOOST_SPIRIT_UNICODE_1_JANUARY_12_2010_0728PM
  9. #if defined(_MSC_VER)
  10. #pragma once
  11. #endif
  12. #include <boost/cstdint.hpp>
  13. #include <boost/spirit/home/support/char_encoding/unicode/query.hpp>
  14. namespace boost { namespace spirit { namespace char_encoding
  15. {
  16. ///////////////////////////////////////////////////////////////////////////
  17. // Test characters for specified conditions (using iso8859-1)
  18. ///////////////////////////////////////////////////////////////////////////
  19. struct unicode
  20. {
  21. #ifdef BOOST_NO_CXX11_CHAR32_T
  22. typedef ::boost::uint32_t char_type;
  23. #else
  24. typedef char32_t char_type;
  25. #endif
  26. typedef ::boost::uint32_t classify_type;
  27. ///////////////////////////////////////////////////////////////////////////
  28. // Posix stuff
  29. ///////////////////////////////////////////////////////////////////////////
  30. static bool
  31. isascii_(char_type ch)
  32. {
  33. return 0 == (ch & ~0x7f);
  34. }
  35. static bool
  36. ischar(char_type ch)
  37. {
  38. // unicode code points in the range 0x00 to 0x10FFFF
  39. return ch <= 0x10FFFF;
  40. }
  41. static bool
  42. isalnum(char_type ch)
  43. {
  44. return ucd::is_alphanumeric(ch);
  45. }
  46. static bool
  47. isalpha(char_type ch)
  48. {
  49. return ucd::is_alphabetic(ch);
  50. }
  51. static bool
  52. isdigit(char_type ch)
  53. {
  54. return ucd::is_decimal_number(ch);
  55. }
  56. static bool
  57. isxdigit(char_type ch)
  58. {
  59. return ucd::is_hex_digit(ch);
  60. }
  61. static bool
  62. iscntrl(char_type ch)
  63. {
  64. return ucd::is_control(ch);
  65. }
  66. static bool
  67. isgraph(char_type ch)
  68. {
  69. return ucd::is_graph(ch);
  70. }
  71. static bool
  72. islower(char_type ch)
  73. {
  74. return ucd::is_lowercase(ch);
  75. }
  76. static bool
  77. isprint(char_type ch)
  78. {
  79. return ucd::is_print(ch);
  80. }
  81. static bool
  82. ispunct(char_type ch)
  83. {
  84. return ucd::is_punctuation(ch);
  85. }
  86. static bool
  87. isspace(char_type ch)
  88. {
  89. return ucd::is_white_space(ch);
  90. }
  91. static bool
  92. isblank BOOST_PREVENT_MACRO_SUBSTITUTION (char_type ch)
  93. {
  94. return ucd::is_blank(ch);
  95. }
  96. static bool
  97. isupper(char_type ch)
  98. {
  99. return ucd::is_uppercase(ch);
  100. }
  101. ///////////////////////////////////////////////////////////////////////////
  102. // Simple character conversions
  103. ///////////////////////////////////////////////////////////////////////////
  104. static char_type
  105. tolower(char_type ch)
  106. {
  107. return ucd::to_lowercase(ch);
  108. }
  109. static char_type
  110. toupper(char_type ch)
  111. {
  112. return ucd::to_uppercase(ch);
  113. }
  114. static ::boost::uint32_t
  115. toucs4(char_type ch)
  116. {
  117. return ch;
  118. }
  119. ///////////////////////////////////////////////////////////////////////////
  120. // Major Categories
  121. ///////////////////////////////////////////////////////////////////////////
  122. #define BOOST_SPIRIT_MAJOR_CATEGORY(name) \
  123. static bool \
  124. is_##name(char_type ch) \
  125. { \
  126. return ucd::get_major_category(ch) == ucd::properties::name; \
  127. } \
  128. /***/
  129. BOOST_SPIRIT_MAJOR_CATEGORY(letter)
  130. BOOST_SPIRIT_MAJOR_CATEGORY(mark)
  131. BOOST_SPIRIT_MAJOR_CATEGORY(number)
  132. BOOST_SPIRIT_MAJOR_CATEGORY(separator)
  133. BOOST_SPIRIT_MAJOR_CATEGORY(other)
  134. BOOST_SPIRIT_MAJOR_CATEGORY(punctuation)
  135. BOOST_SPIRIT_MAJOR_CATEGORY(symbol)
  136. ///////////////////////////////////////////////////////////////////////////
  137. // General Categories
  138. ///////////////////////////////////////////////////////////////////////////
  139. #define BOOST_SPIRIT_CATEGORY(name) \
  140. static bool \
  141. is_##name(char_type ch) \
  142. { \
  143. return ucd::get_category(ch) == ucd::properties::name; \
  144. } \
  145. /***/
  146. BOOST_SPIRIT_CATEGORY(uppercase_letter)
  147. BOOST_SPIRIT_CATEGORY(lowercase_letter)
  148. BOOST_SPIRIT_CATEGORY(titlecase_letter)
  149. BOOST_SPIRIT_CATEGORY(modifier_letter)
  150. BOOST_SPIRIT_CATEGORY(other_letter)
  151. BOOST_SPIRIT_CATEGORY(nonspacing_mark)
  152. BOOST_SPIRIT_CATEGORY(enclosing_mark)
  153. BOOST_SPIRIT_CATEGORY(spacing_mark)
  154. BOOST_SPIRIT_CATEGORY(decimal_number)
  155. BOOST_SPIRIT_CATEGORY(letter_number)
  156. BOOST_SPIRIT_CATEGORY(other_number)
  157. BOOST_SPIRIT_CATEGORY(space_separator)
  158. BOOST_SPIRIT_CATEGORY(line_separator)
  159. BOOST_SPIRIT_CATEGORY(paragraph_separator)
  160. BOOST_SPIRIT_CATEGORY(control)
  161. BOOST_SPIRIT_CATEGORY(format)
  162. BOOST_SPIRIT_CATEGORY(private_use)
  163. BOOST_SPIRIT_CATEGORY(surrogate)
  164. BOOST_SPIRIT_CATEGORY(unassigned)
  165. BOOST_SPIRIT_CATEGORY(dash_punctuation)
  166. BOOST_SPIRIT_CATEGORY(open_punctuation)
  167. BOOST_SPIRIT_CATEGORY(close_punctuation)
  168. BOOST_SPIRIT_CATEGORY(connector_punctuation)
  169. BOOST_SPIRIT_CATEGORY(other_punctuation)
  170. BOOST_SPIRIT_CATEGORY(initial_punctuation)
  171. BOOST_SPIRIT_CATEGORY(final_punctuation)
  172. BOOST_SPIRIT_CATEGORY(math_symbol)
  173. BOOST_SPIRIT_CATEGORY(currency_symbol)
  174. BOOST_SPIRIT_CATEGORY(modifier_symbol)
  175. BOOST_SPIRIT_CATEGORY(other_symbol)
  176. ///////////////////////////////////////////////////////////////////////////
  177. // Derived Categories
  178. ///////////////////////////////////////////////////////////////////////////
  179. #define BOOST_SPIRIT_DERIVED_CATEGORY(name) \
  180. static bool \
  181. is_##name(char_type ch) \
  182. { \
  183. return ucd::is_##name(ch); \
  184. } \
  185. /***/
  186. BOOST_SPIRIT_DERIVED_CATEGORY(alphabetic)
  187. BOOST_SPIRIT_DERIVED_CATEGORY(uppercase)
  188. BOOST_SPIRIT_DERIVED_CATEGORY(lowercase)
  189. BOOST_SPIRIT_DERIVED_CATEGORY(white_space)
  190. BOOST_SPIRIT_DERIVED_CATEGORY(hex_digit)
  191. BOOST_SPIRIT_DERIVED_CATEGORY(noncharacter_code_point)
  192. BOOST_SPIRIT_DERIVED_CATEGORY(default_ignorable_code_point)
  193. ///////////////////////////////////////////////////////////////////////////
  194. // Scripts
  195. ///////////////////////////////////////////////////////////////////////////
  196. #define BOOST_SPIRIT_SCRIPT(name) \
  197. static bool \
  198. is_##name(char_type ch) \
  199. { \
  200. return ucd::get_script(ch) == ucd::properties::name; \
  201. } \
  202. /***/
  203. BOOST_SPIRIT_SCRIPT(adlam)
  204. BOOST_SPIRIT_SCRIPT(caucasian_albanian)
  205. BOOST_SPIRIT_SCRIPT(ahom)
  206. BOOST_SPIRIT_SCRIPT(arabic)
  207. BOOST_SPIRIT_SCRIPT(imperial_aramaic)
  208. BOOST_SPIRIT_SCRIPT(armenian)
  209. BOOST_SPIRIT_SCRIPT(avestan)
  210. BOOST_SPIRIT_SCRIPT(balinese)
  211. BOOST_SPIRIT_SCRIPT(bamum)
  212. BOOST_SPIRIT_SCRIPT(bassa_vah)
  213. BOOST_SPIRIT_SCRIPT(batak)
  214. BOOST_SPIRIT_SCRIPT(bengali)
  215. BOOST_SPIRIT_SCRIPT(bhaiksuki)
  216. BOOST_SPIRIT_SCRIPT(bopomofo)
  217. BOOST_SPIRIT_SCRIPT(brahmi)
  218. BOOST_SPIRIT_SCRIPT(braille)
  219. BOOST_SPIRIT_SCRIPT(buginese)
  220. BOOST_SPIRIT_SCRIPT(buhid)
  221. BOOST_SPIRIT_SCRIPT(chakma)
  222. BOOST_SPIRIT_SCRIPT(canadian_aboriginal)
  223. BOOST_SPIRIT_SCRIPT(carian)
  224. BOOST_SPIRIT_SCRIPT(cham)
  225. BOOST_SPIRIT_SCRIPT(cherokee)
  226. BOOST_SPIRIT_SCRIPT(chorasmian)
  227. BOOST_SPIRIT_SCRIPT(coptic)
  228. BOOST_SPIRIT_SCRIPT(cypro_minoan)
  229. BOOST_SPIRIT_SCRIPT(cypriot)
  230. BOOST_SPIRIT_SCRIPT(cyrillic)
  231. BOOST_SPIRIT_SCRIPT(devanagari)
  232. BOOST_SPIRIT_SCRIPT(dives_akuru)
  233. BOOST_SPIRIT_SCRIPT(dogra)
  234. BOOST_SPIRIT_SCRIPT(deseret)
  235. BOOST_SPIRIT_SCRIPT(duployan)
  236. BOOST_SPIRIT_SCRIPT(egyptian_hieroglyphs)
  237. BOOST_SPIRIT_SCRIPT(elbasan)
  238. BOOST_SPIRIT_SCRIPT(elymaic)
  239. BOOST_SPIRIT_SCRIPT(ethiopic)
  240. BOOST_SPIRIT_SCRIPT(georgian)
  241. BOOST_SPIRIT_SCRIPT(glagolitic)
  242. BOOST_SPIRIT_SCRIPT(gunjala_gondi)
  243. BOOST_SPIRIT_SCRIPT(masaram_gondi)
  244. BOOST_SPIRIT_SCRIPT(gothic)
  245. BOOST_SPIRIT_SCRIPT(grantha)
  246. BOOST_SPIRIT_SCRIPT(greek)
  247. BOOST_SPIRIT_SCRIPT(gujarati)
  248. BOOST_SPIRIT_SCRIPT(gurmukhi)
  249. BOOST_SPIRIT_SCRIPT(hangul)
  250. BOOST_SPIRIT_SCRIPT(han)
  251. BOOST_SPIRIT_SCRIPT(hanunoo)
  252. BOOST_SPIRIT_SCRIPT(hatran)
  253. BOOST_SPIRIT_SCRIPT(hebrew)
  254. BOOST_SPIRIT_SCRIPT(hiragana)
  255. BOOST_SPIRIT_SCRIPT(anatolian_hieroglyphs)
  256. BOOST_SPIRIT_SCRIPT(pahawh_hmong)
  257. BOOST_SPIRIT_SCRIPT(nyiakeng_puachue_hmong)
  258. BOOST_SPIRIT_SCRIPT(katakana_or_hiragana)
  259. BOOST_SPIRIT_SCRIPT(old_hungarian)
  260. BOOST_SPIRIT_SCRIPT(old_italic)
  261. BOOST_SPIRIT_SCRIPT(javanese)
  262. BOOST_SPIRIT_SCRIPT(kayah_li)
  263. BOOST_SPIRIT_SCRIPT(katakana)
  264. BOOST_SPIRIT_SCRIPT(kawi)
  265. BOOST_SPIRIT_SCRIPT(kharoshthi)
  266. BOOST_SPIRIT_SCRIPT(khmer)
  267. BOOST_SPIRIT_SCRIPT(khojki)
  268. BOOST_SPIRIT_SCRIPT(khitan_small_script)
  269. BOOST_SPIRIT_SCRIPT(kannada)
  270. BOOST_SPIRIT_SCRIPT(kaithi)
  271. BOOST_SPIRIT_SCRIPT(tai_tham)
  272. BOOST_SPIRIT_SCRIPT(lao)
  273. BOOST_SPIRIT_SCRIPT(latin)
  274. BOOST_SPIRIT_SCRIPT(lepcha)
  275. BOOST_SPIRIT_SCRIPT(limbu)
  276. BOOST_SPIRIT_SCRIPT(linear_a)
  277. BOOST_SPIRIT_SCRIPT(linear_b)
  278. BOOST_SPIRIT_SCRIPT(lisu)
  279. BOOST_SPIRIT_SCRIPT(lycian)
  280. BOOST_SPIRIT_SCRIPT(lydian)
  281. BOOST_SPIRIT_SCRIPT(mahajani)
  282. BOOST_SPIRIT_SCRIPT(makasar)
  283. BOOST_SPIRIT_SCRIPT(mandaic)
  284. BOOST_SPIRIT_SCRIPT(manichaean)
  285. BOOST_SPIRIT_SCRIPT(marchen)
  286. BOOST_SPIRIT_SCRIPT(medefaidrin)
  287. BOOST_SPIRIT_SCRIPT(mende_kikakui)
  288. BOOST_SPIRIT_SCRIPT(meroitic_cursive)
  289. BOOST_SPIRIT_SCRIPT(meroitic_hieroglyphs)
  290. BOOST_SPIRIT_SCRIPT(malayalam)
  291. BOOST_SPIRIT_SCRIPT(modi)
  292. BOOST_SPIRIT_SCRIPT(mongolian)
  293. BOOST_SPIRIT_SCRIPT(mro)
  294. BOOST_SPIRIT_SCRIPT(meetei_mayek)
  295. BOOST_SPIRIT_SCRIPT(multani)
  296. BOOST_SPIRIT_SCRIPT(myanmar)
  297. BOOST_SPIRIT_SCRIPT(nag_mundari)
  298. BOOST_SPIRIT_SCRIPT(nandinagari)
  299. BOOST_SPIRIT_SCRIPT(old_north_arabian)
  300. BOOST_SPIRIT_SCRIPT(nabataean)
  301. BOOST_SPIRIT_SCRIPT(newa)
  302. BOOST_SPIRIT_SCRIPT(nko)
  303. BOOST_SPIRIT_SCRIPT(nushu)
  304. BOOST_SPIRIT_SCRIPT(ogham)
  305. BOOST_SPIRIT_SCRIPT(ol_chiki)
  306. BOOST_SPIRIT_SCRIPT(old_turkic)
  307. BOOST_SPIRIT_SCRIPT(oriya)
  308. BOOST_SPIRIT_SCRIPT(osage)
  309. BOOST_SPIRIT_SCRIPT(osmanya)
  310. BOOST_SPIRIT_SCRIPT(old_uyghur)
  311. BOOST_SPIRIT_SCRIPT(palmyrene)
  312. BOOST_SPIRIT_SCRIPT(pau_cin_hau)
  313. BOOST_SPIRIT_SCRIPT(old_permic)
  314. BOOST_SPIRIT_SCRIPT(phags_pa)
  315. BOOST_SPIRIT_SCRIPT(inscriptional_pahlavi)
  316. BOOST_SPIRIT_SCRIPT(psalter_pahlavi)
  317. BOOST_SPIRIT_SCRIPT(phoenician)
  318. BOOST_SPIRIT_SCRIPT(miao)
  319. BOOST_SPIRIT_SCRIPT(inscriptional_parthian)
  320. BOOST_SPIRIT_SCRIPT(rejang)
  321. BOOST_SPIRIT_SCRIPT(hanifi_rohingya)
  322. BOOST_SPIRIT_SCRIPT(runic)
  323. BOOST_SPIRIT_SCRIPT(samaritan)
  324. BOOST_SPIRIT_SCRIPT(old_south_arabian)
  325. BOOST_SPIRIT_SCRIPT(saurashtra)
  326. BOOST_SPIRIT_SCRIPT(signwriting)
  327. BOOST_SPIRIT_SCRIPT(shavian)
  328. BOOST_SPIRIT_SCRIPT(sharada)
  329. BOOST_SPIRIT_SCRIPT(siddham)
  330. BOOST_SPIRIT_SCRIPT(khudawadi)
  331. BOOST_SPIRIT_SCRIPT(sinhala)
  332. BOOST_SPIRIT_SCRIPT(sogdian)
  333. BOOST_SPIRIT_SCRIPT(old_sogdian)
  334. BOOST_SPIRIT_SCRIPT(sora_sompeng)
  335. BOOST_SPIRIT_SCRIPT(soyombo)
  336. BOOST_SPIRIT_SCRIPT(sundanese)
  337. BOOST_SPIRIT_SCRIPT(syloti_nagri)
  338. BOOST_SPIRIT_SCRIPT(syriac)
  339. BOOST_SPIRIT_SCRIPT(tagbanwa)
  340. BOOST_SPIRIT_SCRIPT(takri)
  341. BOOST_SPIRIT_SCRIPT(tai_le)
  342. BOOST_SPIRIT_SCRIPT(new_tai_lue)
  343. BOOST_SPIRIT_SCRIPT(tamil)
  344. BOOST_SPIRIT_SCRIPT(tangut)
  345. BOOST_SPIRIT_SCRIPT(tai_viet)
  346. BOOST_SPIRIT_SCRIPT(telugu)
  347. BOOST_SPIRIT_SCRIPT(tifinagh)
  348. BOOST_SPIRIT_SCRIPT(tagalog)
  349. BOOST_SPIRIT_SCRIPT(thaana)
  350. BOOST_SPIRIT_SCRIPT(thai)
  351. BOOST_SPIRIT_SCRIPT(tibetan)
  352. BOOST_SPIRIT_SCRIPT(tirhuta)
  353. BOOST_SPIRIT_SCRIPT(tangsa)
  354. BOOST_SPIRIT_SCRIPT(toto)
  355. BOOST_SPIRIT_SCRIPT(ugaritic)
  356. BOOST_SPIRIT_SCRIPT(vai)
  357. BOOST_SPIRIT_SCRIPT(vithkuqi)
  358. BOOST_SPIRIT_SCRIPT(warang_citi)
  359. BOOST_SPIRIT_SCRIPT(wancho)
  360. BOOST_SPIRIT_SCRIPT(old_persian)
  361. BOOST_SPIRIT_SCRIPT(cuneiform)
  362. BOOST_SPIRIT_SCRIPT(yezidi)
  363. BOOST_SPIRIT_SCRIPT(yi)
  364. BOOST_SPIRIT_SCRIPT(zanabazar_square)
  365. BOOST_SPIRIT_SCRIPT(inherited)
  366. BOOST_SPIRIT_SCRIPT(common)
  367. BOOST_SPIRIT_SCRIPT(unknown)
  368. #undef BOOST_SPIRIT_MAJOR_CATEGORY
  369. #undef BOOST_SPIRIT_CATEGORY
  370. #undef BOOST_SPIRIT_DERIVED_CATEGORY
  371. #undef BOOST_SPIRIT_SCRIPT
  372. };
  373. }}}
  374. #endif