/*============================================================================= Copyright (c) 2001-2011 Joel de Guzman Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Autogenerated by MultiStageTable.py (Unicode multi-stage table builder) (c) Peter Kankowski, 2008 ==============================================================================*/ #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010) #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010 #include # include "category_table.hpp" # include "script_table.hpp" # include "lowercase_table.hpp" # include "uppercase_table.hpp" namespace boost { namespace spirit { namespace ucd { // This header provides Basic (Level 1) Unicode Support // See http://unicode.org/reports/tr18/ for details struct properties { // bit pattern: xxMMMCCC // MMM: major_category // CCC: category enum major_category { letter, mark, number, separator, other, punctuation, symbol }; enum category { uppercase_letter = 0, // [Lu] an uppercase letter lowercase_letter, // [Ll] a lowercase letter titlecase_letter, // [Lt] a digraphic character, with first part uppercase modifier_letter, // [Lm] a modifier letter other_letter, // [Lo] other letters, including syllables and ideographs nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width) enclosing_mark, // [Me] an enclosing combining mark spacing_mark, // [Mc] a spacing combining mark (positive advance width) decimal_number = 16, // [Nd] a decimal digit letter_number, // [Nl] a letterlike numeric character other_number, // [No] a numeric character of other type space_separator = 24, // [Zs] a space character (of various non-zero widths) line_separator, // [Zl] U+2028 LINE SEPARATOR only paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only control = 32, // [Cc] a C0 or C1 control code format, // [Cf] a format control character private_use, // [Co] a private-use character surrogate, // [Cs] a surrogate code point unassigned, // [Cn] a reserved unassigned code point or a noncharacter dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark open_punctuation, // [Ps] an opening punctuation mark (of a pair) close_punctuation, // [Pe] a closing punctuation mark (of a pair) connector_punctuation, // [Pc] a connecting punctuation mark, like a tie other_punctuation, // [Po] a punctuation mark of other type initial_punctuation, // [Pi] an initial quotation mark final_punctuation, // [Pf] a final quotation mark math_symbol = 48, // [Sm] a symbol of primarily mathematical use currency_symbol, // [Sc] a currency sign modifier_symbol, // [Sk] a non-letterlike modifier symbol other_symbol // [So] a symbol of other type }; enum derived_properties { alphabetic = 64, uppercase = 128, lowercase = 256, white_space = 512, hex_digit = 1024, noncharacter_code_point = 2048, default_ignorable_code_point = 4096 }; enum script { adlam, caucasian_albanian, ahom, arabic, imperial_aramaic, armenian, avestan, balinese, bamum, bassa_vah, batak, bengali, bhaiksuki, bopomofo, brahmi, braille, buginese, buhid, chakma, canadian_aboriginal, carian, cham, cherokee, chorasmian, coptic, cypro_minoan, cypriot, cyrillic, devanagari, dives_akuru, dogra, deseret, duployan, egyptian_hieroglyphs, elbasan, elymaic, ethiopic, georgian, glagolitic, gunjala_gondi, masaram_gondi, gothic, grantha, greek, gujarati, gurmukhi, hangul, han, hanunoo, hatran, hebrew, hiragana, anatolian_hieroglyphs, pahawh_hmong, nyiakeng_puachue_hmong, katakana_or_hiragana, old_hungarian, old_italic, javanese, kayah_li, katakana, kawi, kharoshthi, khmer, khojki, khitan_small_script, kannada, kaithi, tai_tham, lao, latin, lepcha, limbu, linear_a, linear_b, lisu, lycian, lydian, mahajani, makasar, mandaic, manichaean, marchen, medefaidrin, mende_kikakui, meroitic_cursive, meroitic_hieroglyphs, malayalam, modi, mongolian, mro, meetei_mayek, multani, myanmar, nag_mundari, nandinagari, old_north_arabian, nabataean, newa, nko, nushu, ogham, ol_chiki, old_turkic, oriya, osage, osmanya, old_uyghur, palmyrene, pau_cin_hau, old_permic, phags_pa, inscriptional_pahlavi, psalter_pahlavi, phoenician, miao, inscriptional_parthian, rejang, hanifi_rohingya, runic, samaritan, old_south_arabian, saurashtra, signwriting, shavian, sharada, siddham, khudawadi, sinhala, sogdian, old_sogdian, sora_sompeng, soyombo, sundanese, syloti_nagri, syriac, tagbanwa, takri, tai_le, new_tai_lue, tamil, tangut, tai_viet, telugu, tifinagh, tagalog, thaana, thai, tibetan, tirhuta, tangsa, toto, ugaritic, vai, vithkuqi, warang_citi, wancho, old_persian, cuneiform, yezidi, yi, zanabazar_square, inherited, common, unknown }; }; inline properties::category get_category(::boost::uint32_t ch) { return static_cast(detail::category_lookup(ch) & 0x3F); } inline properties::major_category get_major_category(::boost::uint32_t ch) { return static_cast(get_category(ch) >> 3); } inline bool is_punctuation(::boost::uint32_t ch) { return get_major_category(ch) == properties::punctuation; } inline bool is_decimal_number(::boost::uint32_t ch) { return get_category(ch) == properties::decimal_number; } inline bool is_hex_digit(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::hex_digit) != 0; } inline bool is_control(::boost::uint32_t ch) { return get_category(ch) == properties::control; } inline bool is_alphabetic(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::alphabetic) != 0; } inline bool is_alphanumeric(::boost::uint32_t ch) { return is_decimal_number(ch) || is_alphabetic(ch); } inline bool is_uppercase(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::uppercase) != 0; } inline bool is_lowercase(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::lowercase) != 0; } inline bool is_white_space(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::white_space) != 0; } inline bool is_blank(::boost::uint32_t ch) { switch (ch) { case '\n': case '\v': case '\f': case '\r': return false; default: return is_white_space(ch) && !( get_category(ch) == properties::line_separator || get_category(ch) == properties::paragraph_separator ); } } inline bool is_graph(::boost::uint32_t ch) { return !( is_white_space(ch) || get_category(ch) == properties::control || get_category(ch) == properties::surrogate || get_category(ch) == properties::unassigned ); } inline bool is_print(::boost::uint32_t ch) { return (is_graph(ch) || is_blank(ch)) && !is_control(ch); } inline bool is_noncharacter_code_point(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0; } inline bool is_default_ignorable_code_point(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0; } inline properties::script get_script(::boost::uint32_t ch) { return static_cast(detail::script_lookup(ch)); } inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch) { // The table returns 0 to signal that this code maps to itself ::boost::uint32_t r = detail::lowercase_lookup(ch); return (r == 0)? ch : r; } inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch) { // The table returns 0 to signal that this code maps to itself ::boost::uint32_t r = detail::uppercase_lookup(ch); return (r == 0)? ch : r; } }}} #endif