123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376 |
- /*=============================================================================
- Copyright (c) 2001-2011 Joel de Guzman
- Distributed under the Boost Software License, Version 1.0. (See accompanying
- file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- Autogenerated by MultiStageTable.py (Unicode multi-stage
- table builder) (c) Peter Kankowski, 2008
- ==============================================================================*/
- #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
- #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
- #include <boost/cstdint.hpp>
- # include "category_table.hpp"
- # include "script_table.hpp"
- # include "lowercase_table.hpp"
- # include "uppercase_table.hpp"
- namespace boost { namespace spirit { namespace ucd
- {
- // This header provides Basic (Level 1) Unicode Support
- // See http://unicode.org/reports/tr18/ for details
- struct properties
- {
- // bit pattern: xxMMMCCC
- // MMM: major_category
- // CCC: category
- enum major_category
- {
- letter,
- mark,
- number,
- separator,
- other,
- punctuation,
- symbol
- };
- enum category
- {
- uppercase_letter = 0, // [Lu] an uppercase letter
- lowercase_letter, // [Ll] a lowercase letter
- titlecase_letter, // [Lt] a digraphic character, with first part uppercase
- modifier_letter, // [Lm] a modifier letter
- other_letter, // [Lo] other letters, including syllables and ideographs
- nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
- enclosing_mark, // [Me] an enclosing combining mark
- spacing_mark, // [Mc] a spacing combining mark (positive advance width)
- decimal_number = 16, // [Nd] a decimal digit
- letter_number, // [Nl] a letterlike numeric character
- other_number, // [No] a numeric character of other type
- space_separator = 24, // [Zs] a space character (of various non-zero widths)
- line_separator, // [Zl] U+2028 LINE SEPARATOR only
- paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only
- control = 32, // [Cc] a C0 or C1 control code
- format, // [Cf] a format control character
- private_use, // [Co] a private-use character
- surrogate, // [Cs] a surrogate code point
- unassigned, // [Cn] a reserved unassigned code point or a noncharacter
- dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
- open_punctuation, // [Ps] an opening punctuation mark (of a pair)
- close_punctuation, // [Pe] a closing punctuation mark (of a pair)
- connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
- other_punctuation, // [Po] a punctuation mark of other type
- initial_punctuation, // [Pi] an initial quotation mark
- final_punctuation, // [Pf] a final quotation mark
- math_symbol = 48, // [Sm] a symbol of primarily mathematical use
- currency_symbol, // [Sc] a currency sign
- modifier_symbol, // [Sk] a non-letterlike modifier symbol
- other_symbol // [So] a symbol of other type
- };
- enum derived_properties
- {
- alphabetic = 64,
- uppercase = 128,
- lowercase = 256,
- white_space = 512,
- hex_digit = 1024,
- noncharacter_code_point = 2048,
- default_ignorable_code_point = 4096
- };
- enum script
- {
- adlam,
- caucasian_albanian,
- ahom,
- arabic,
- imperial_aramaic,
- armenian,
- avestan,
- balinese,
- bamum,
- bassa_vah,
- batak,
- bengali,
- bhaiksuki,
- bopomofo,
- brahmi,
- braille,
- buginese,
- buhid,
- chakma,
- canadian_aboriginal,
- carian,
- cham,
- cherokee,
- chorasmian,
- coptic,
- cypro_minoan,
- cypriot,
- cyrillic,
- devanagari,
- dives_akuru,
- dogra,
- deseret,
- duployan,
- egyptian_hieroglyphs,
- elbasan,
- elymaic,
- ethiopic,
- georgian,
- glagolitic,
- gunjala_gondi,
- masaram_gondi,
- gothic,
- grantha,
- greek,
- gujarati,
- gurmukhi,
- hangul,
- han,
- hanunoo,
- hatran,
- hebrew,
- hiragana,
- anatolian_hieroglyphs,
- pahawh_hmong,
- nyiakeng_puachue_hmong,
- katakana_or_hiragana,
- old_hungarian,
- old_italic,
- javanese,
- kayah_li,
- katakana,
- kawi,
- kharoshthi,
- khmer,
- khojki,
- khitan_small_script,
- kannada,
- kaithi,
- tai_tham,
- lao,
- latin,
- lepcha,
- limbu,
- linear_a,
- linear_b,
- lisu,
- lycian,
- lydian,
- mahajani,
- makasar,
- mandaic,
- manichaean,
- marchen,
- medefaidrin,
- mende_kikakui,
- meroitic_cursive,
- meroitic_hieroglyphs,
- malayalam,
- modi,
- mongolian,
- mro,
- meetei_mayek,
- multani,
- myanmar,
- nag_mundari,
- nandinagari,
- old_north_arabian,
- nabataean,
- newa,
- nko,
- nushu,
- ogham,
- ol_chiki,
- old_turkic,
- oriya,
- osage,
- osmanya,
- old_uyghur,
- palmyrene,
- pau_cin_hau,
- old_permic,
- phags_pa,
- inscriptional_pahlavi,
- psalter_pahlavi,
- phoenician,
- miao,
- inscriptional_parthian,
- rejang,
- hanifi_rohingya,
- runic,
- samaritan,
- old_south_arabian,
- saurashtra,
- signwriting,
- shavian,
- sharada,
- siddham,
- khudawadi,
- sinhala,
- sogdian,
- old_sogdian,
- sora_sompeng,
- soyombo,
- sundanese,
- syloti_nagri,
- syriac,
- tagbanwa,
- takri,
- tai_le,
- new_tai_lue,
- tamil,
- tangut,
- tai_viet,
- telugu,
- tifinagh,
- tagalog,
- thaana,
- thai,
- tibetan,
- tirhuta,
- tangsa,
- toto,
- ugaritic,
- vai,
- vithkuqi,
- warang_citi,
- wancho,
- old_persian,
- cuneiform,
- yezidi,
- yi,
- zanabazar_square,
- inherited,
- common,
- unknown
- };
- };
- inline properties::category get_category(::boost::uint32_t ch)
- {
- return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
- }
- inline properties::major_category get_major_category(::boost::uint32_t ch)
- {
- return static_cast<properties::major_category>(get_category(ch) >> 3);
- }
- inline bool is_punctuation(::boost::uint32_t ch)
- {
- return get_major_category(ch) == properties::punctuation;
- }
- inline bool is_decimal_number(::boost::uint32_t ch)
- {
- return get_category(ch) == properties::decimal_number;
- }
- inline bool is_hex_digit(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::hex_digit) != 0;
- }
- inline bool is_control(::boost::uint32_t ch)
- {
- return get_category(ch) == properties::control;
- }
- inline bool is_alphabetic(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::alphabetic) != 0;
- }
- inline bool is_alphanumeric(::boost::uint32_t ch)
- {
- return is_decimal_number(ch) || is_alphabetic(ch);
- }
- inline bool is_uppercase(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::uppercase) != 0;
- }
- inline bool is_lowercase(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::lowercase) != 0;
- }
- inline bool is_white_space(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::white_space) != 0;
- }
- inline bool is_blank(::boost::uint32_t ch)
- {
- switch (ch)
- {
- case '\n': case '\v': case '\f': case '\r':
- return false;
- default:
- return is_white_space(ch)
- && !( get_category(ch) == properties::line_separator
- || get_category(ch) == properties::paragraph_separator
- );
- }
- }
- inline bool is_graph(::boost::uint32_t ch)
- {
- return !( is_white_space(ch)
- || get_category(ch) == properties::control
- || get_category(ch) == properties::surrogate
- || get_category(ch) == properties::unassigned
- );
- }
- inline bool is_print(::boost::uint32_t ch)
- {
- return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
- }
- inline bool is_noncharacter_code_point(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
- }
- inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
- }
- inline properties::script get_script(::boost::uint32_t ch)
- {
- return static_cast<properties::script>(detail::script_lookup(ch));
- }
- inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
- {
- // The table returns 0 to signal that this code maps to itself
- ::boost::uint32_t r = detail::lowercase_lookup(ch);
- return (r == 0)? ch : r;
- }
- inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
- {
- // The table returns 0 to signal that this code maps to itself
- ::boost::uint32_t r = detail::uppercase_lookup(ch);
- return (r == 0)? ch : r;
- }
- }}}
- #endif
|