00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef UCHAR_H
00024 #define UCHAR_H
00025
00026 #include "unicode/utypes.h"
00027
00028 U_CDECL_BEGIN
00029
00030
00031
00032
00033 #define U_UNICODE_VERSION "3.1.1"
00034
00056 #define UCHAR_MIN_VALUE 0
00057
00064 #define UCHAR_MAX_VALUE 0x10ffff
00065
00071 enum UCharCategory
00072 {
00074 U_UNASSIGNED = 0,
00076 U_GENERAL_OTHER_TYPES = 0,
00078 U_UPPERCASE_LETTER = 1,
00080 U_LOWERCASE_LETTER = 2,
00082 U_TITLECASE_LETTER = 3,
00084 U_MODIFIER_LETTER = 4,
00086 U_OTHER_LETTER = 5,
00088 U_NON_SPACING_MARK = 6,
00090 U_ENCLOSING_MARK = 7,
00092 U_COMBINING_SPACING_MARK = 8,
00094 U_DECIMAL_DIGIT_NUMBER = 9,
00096 U_LETTER_NUMBER = 10,
00098 U_OTHER_NUMBER = 11,
00100 U_SPACE_SEPARATOR = 12,
00102 U_LINE_SEPARATOR = 13,
00104 U_PARAGRAPH_SEPARATOR = 14,
00106 U_CONTROL_CHAR = 15,
00108 U_FORMAT_CHAR = 16,
00110 U_PRIVATE_USE_CHAR = 17,
00112 U_SURROGATE = 18,
00114 U_DASH_PUNCTUATION = 19,
00116 U_START_PUNCTUATION = 20,
00118 U_END_PUNCTUATION = 21,
00120 U_CONNECTOR_PUNCTUATION = 22,
00122 U_OTHER_PUNCTUATION = 23,
00124 U_MATH_SYMBOL = 24,
00126 U_CURRENCY_SYMBOL = 25,
00128 U_MODIFIER_SYMBOL = 26,
00130 U_OTHER_SYMBOL = 27,
00132 U_INITIAL_PUNCTUATION = 28,
00134 U_FINAL_PUNCTUATION = 29,
00136 U_CHAR_CATEGORY_COUNT
00137 };
00138
00139 typedef enum UCharCategory UCharCategory;
00140
00145 enum UCharDirection {
00147 U_LEFT_TO_RIGHT = 0,
00149 U_RIGHT_TO_LEFT = 1,
00151 U_EUROPEAN_NUMBER = 2,
00153 U_EUROPEAN_NUMBER_SEPARATOR = 3,
00155 U_EUROPEAN_NUMBER_TERMINATOR = 4,
00157 U_ARABIC_NUMBER = 5,
00159 U_COMMON_NUMBER_SEPARATOR = 6,
00161 U_BLOCK_SEPARATOR = 7,
00163 U_SEGMENT_SEPARATOR = 8,
00165 U_WHITE_SPACE_NEUTRAL = 9,
00167 U_OTHER_NEUTRAL = 10,
00169 U_LEFT_TO_RIGHT_EMBEDDING = 11,
00171 U_LEFT_TO_RIGHT_OVERRIDE = 12,
00173 U_RIGHT_TO_LEFT_ARABIC = 13,
00175 U_RIGHT_TO_LEFT_EMBEDDING = 14,
00177 U_RIGHT_TO_LEFT_OVERRIDE = 15,
00179 U_POP_DIRECTIONAL_FORMAT = 16,
00181 U_DIR_NON_SPACING_MARK = 17,
00183 U_BOUNDARY_NEUTRAL = 18,
00185 U_CHAR_DIRECTION_COUNT
00186 };
00187
00188 typedef enum UCharDirection UCharDirection;
00189
00195 enum UBlockCode {
00197 UBLOCK_BASIC_LATIN = 1,
00199 U_BASIC_LATIN = 1,
00200
00202 UBLOCK_LATIN_1_SUPPLEMENT=2,
00204 U_LATIN_1_SUPPLEMENT=2,
00205
00207 UBLOCK_LATIN_EXTENDED_A =3,
00209 U_LATIN_EXTENDED_A=3,
00210
00212 UBLOCK_LATIN_EXTENDED_B =4,
00214 U_LATIN_EXTENDED_B=4,
00215
00217 UBLOCK_IPA_EXTENSIONS =5,
00219 U_IPA_EXTENSIONS=5,
00220
00222 UBLOCK_SPACING_MODIFIER_LETTERS =6,
00224 U_SPACING_MODIFIER_LETTERS=6,
00225
00227 UBLOCK_COMBINING_DIACRITICAL_MARKS =7,
00229 U_COMBINING_DIACRITICAL_MARKS=7,
00230
00232 UBLOCK_GREEK =8,
00234 U_GREEK=8,
00235
00237 UBLOCK_CYRILLIC =9,
00239 U_CYRILLIC=9,
00240
00242 UBLOCK_ARMENIAN =10,
00244 U_ARMENIAN=10,
00245
00247 UBLOCK_HEBREW =11,
00249 U_HEBREW=11,
00250
00252 UBLOCK_ARABIC =12,
00254 U_ARABIC=12,
00255
00257 UBLOCK_SYRIAC =13,
00259 U_SYRIAC=13,
00260
00262 UBLOCK_THAANA =14,
00264 U_THAANA=14,
00265
00267 UBLOCK_DEVANAGARI =15,
00269 U_DEVANAGARI=15,
00270
00272 UBLOCK_BENGALI =16,
00274 U_BENGALI=16,
00275
00277 UBLOCK_GURMUKHI =17,
00279 U_GURMUKHI=17,
00280
00282 UBLOCK_GUJARATI =18,
00284 U_GUJARATI=18,
00285
00287 UBLOCK_ORIYA =19,
00289 U_ORIYA=19,
00290
00292 UBLOCK_TAMIL =20,
00294 U_TAMIL=20,
00295
00297 UBLOCK_TELUGU =21,
00299 U_TELUGU=21,
00300
00302 UBLOCK_KANNADA =22,
00304 U_KANNADA=22,
00305
00307 UBLOCK_MALAYALAM =23,
00309 U_MALAYALAM=23,
00310
00312 UBLOCK_SINHALA =24,
00314 U_SINHALA=24,
00315
00317 UBLOCK_THAI =25,
00319 U_THAI=25,
00320
00322 UBLOCK_LAO =26,
00324 U_LAO=26,
00325
00327 UBLOCK_TIBETAN =27,
00329 U_TIBETAN=27,
00330
00332 UBLOCK_MYANMAR =28,
00334 U_MYANMAR=28,
00335
00337 UBLOCK_GEORGIAN =29,
00339 U_GEORGIAN=29,
00340
00342 UBLOCK_HANGUL_JAMO =30,
00344 U_HANGUL_JAMO=30,
00345
00347 UBLOCK_ETHIOPIC =31,
00349 U_ETHIOPIC=31,
00350
00352 UBLOCK_CHEROKEE =32,
00354 U_CHEROKEE=32,
00355
00357 UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33,
00359 U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS=33,
00360
00362 UBLOCK_OGHAM =34,
00364 U_OGHAM=34,
00365
00367 UBLOCK_RUNIC =35,
00369 U_RUNIC=35,
00370
00372 UBLOCK_KHMER =36,
00374 U_KHMER=36,
00375
00377 UBLOCK_MONGOLIAN =37,
00379 U_MONGOLIAN=37,
00380
00382 UBLOCK_LATIN_EXTENDED_ADDITIONAL =38,
00384 U_LATIN_EXTENDED_ADDITIONAL=38,
00385
00387 UBLOCK_GREEK_EXTENDED =39,
00389 U_GREEK_EXTENDED=39,
00390
00392 UBLOCK_GENERAL_PUNCTUATION =40,
00394 U_GENERAL_PUNCTUATION=40,
00395
00397 UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41,
00399 U_SUPERSCRIPTS_AND_SUBSCRIPTS=41,
00400
00402 UBLOCK_CURRENCY_SYMBOLS =42,
00404 U_CURRENCY_SYMBOLS=42,
00405
00407 UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43,
00409 U_COMBINING_MARKS_FOR_SYMBOLS=43,
00410
00412 UBLOCK_LETTERLIKE_SYMBOLS =44,
00414 U_LETTERLIKE_SYMBOLS=44,
00415
00417 UBLOCK_NUMBER_FORMS =45,
00419 U_NUMBER_FORMS=45,
00420
00422 UBLOCK_ARROWS =46,
00424 U_ARROWS=46,
00425
00427 UBLOCK_MATHEMATICAL_OPERATORS =47,
00429 U_MATHEMATICAL_OPERATORS=47,
00430
00432 UBLOCK_MISCELLANEOUS_TECHNICAL =48,
00434 U_MISCELLANEOUS_TECHNICAL=48,
00435
00437 UBLOCK_CONTROL_PICTURES =49,
00439 U_CONTROL_PICTURES=49,
00440
00442 UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50,
00444 U_OPTICAL_CHARACTER_RECOGNITION=50,
00445
00447 UBLOCK_ENCLOSED_ALPHANUMERICS =51,
00449 U_ENCLOSED_ALPHANUMERICS=51,
00450
00452 UBLOCK_BOX_DRAWING =52,
00454 U_BOX_DRAWING=52,
00455
00457 UBLOCK_BLOCK_ELEMENTS =53,
00459 U_BLOCK_ELEMENTS=53,
00460
00462 UBLOCK_GEOMETRIC_SHAPES =54,
00464 U_GEOMETRIC_SHAPES=54,
00465
00467 UBLOCK_MISCELLANEOUS_SYMBOLS =55,
00469 U_MISCELLANEOUS_SYMBOLS=55,
00470
00472 UBLOCK_DINGBATS =56,
00474 U_DINGBATS=56,
00475
00477 UBLOCK_BRAILLE_PATTERNS =57,
00479 U_BRAILLE_PATTERNS=57,
00480
00482 UBLOCK_CJK_RADICALS_SUPPLEMENT =58,
00484 U_CJK_RADICALS_SUPPLEMENT=58,
00485
00487 UBLOCK_KANGXI_RADICALS =59,
00489 U_KANGXI_RADICALS=59,
00490
00492 UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60,
00494 U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS=60,
00495
00497 UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61,
00499 U_CJK_SYMBOLS_AND_PUNCTUATION=61,
00500
00502 UBLOCK_HIRAGANA =62,
00504 U_HIRAGANA=62,
00505
00507 UBLOCK_KATAKANA =63,
00509 U_KATAKANA=63,
00510
00512 UBLOCK_BOPOMOFO =64,
00514 U_BOPOMOFO=64,
00515
00517 UBLOCK_HANGUL_COMPATIBILITY_JAMO =65,
00519 U_HANGUL_COMPATIBILITY_JAMO=65,
00520
00522 UBLOCK_KANBUN =66,
00524 U_KANBUN=66,
00525
00527 UBLOCK_BOPOMOFO_EXTENDED =67,
00529 U_BOPOMOFO_EXTENDED=67,
00530
00532 UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68,
00534 U_ENCLOSED_CJK_LETTERS_AND_MONTHS=68,
00535
00537 UBLOCK_CJK_COMPATIBILITY =69,
00539 U_CJK_COMPATIBILITY=69,
00540
00542 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70,
00544 U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A=70,
00545
00547 UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71,
00549 U_CJK_UNIFIED_IDEOGRAPHS=71,
00550
00552 UBLOCK_YI_SYLLABLES =72,
00554 U_YI_SYLLABLES=72,
00555
00557 UBLOCK_YI_RADICALS =73,
00559 U_YI_RADICALS=73,
00560
00562 UBLOCK_HANGUL_SYLLABLES =74,
00564 U_HANGUL_SYLLABLES=74,
00565
00567 UBLOCK_HIGH_SURROGATES =75,
00569 U_HIGH_SURROGATES=75,
00570
00572 UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76,
00574 U_HIGH_PRIVATE_USE_SURROGATES=76,
00575
00577 UBLOCK_LOW_SURROGATES =77,
00579 U_LOW_SURROGATES=77,
00580
00582 UBLOCK_PRIVATE_USE = 78,
00584 UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE,
00586 U_PRIVATE_USE_AREA=78,
00587
00589 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79,
00591 U_CJK_COMPATIBILITY_IDEOGRAPHS=79,
00592
00594 UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80,
00596 U_ALPHABETIC_PRESENTATION_FORMS=80,
00597
00599 UBLOCK_ARABIC_PRESENTATION_FORMS_A =81,
00601 U_ARABIC_PRESENTATION_FORMS_A=81,
00602
00604 UBLOCK_COMBINING_HALF_MARKS =82,
00606 U_COMBINING_HALF_MARKS=82,
00607
00609 UBLOCK_CJK_COMPATIBILITY_FORMS =83,
00611 U_CJK_COMPATIBILITY_FORMS=83,
00612
00614 UBLOCK_SMALL_FORM_VARIANTS =84,
00616 U_SMALL_FORM_VARIANTS=84,
00617
00619 UBLOCK_ARABIC_PRESENTATION_FORMS_B =85,
00621 U_ARABIC_PRESENTATION_FORMS_B=85,
00622
00624 UBLOCK_SPECIALS =86,
00626 U_SPECIALS=86,
00627
00629 UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87,
00631 U_HALFWIDTH_AND_FULLWIDTH_FORMS=87,
00632
00634 UBLOCK_OLD_ITALIC = 88 ,
00636 UBLOCK_GOTHIC = 89 ,
00638 UBLOCK_DESERET = 90 ,
00640 UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 ,
00642 UBLOCK_MUSICAL_SYMBOLS = 92 ,
00644 UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 ,
00646 UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 ,
00648 UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 ,
00650 UBLOCK_TAGS = 96 ,
00652 UBLOCK_COUNT=97,
00654 U_SCRIPT_COUNT=UBLOCK_COUNT,
00655
00657 UBLOCK_INVALID_CODE=-1,
00658
00660 U_CHAR_SCRIPT_COUNT =UBLOCK_COUNT,
00662 U_NO_SCRIPT = UBLOCK_COUNT
00663 };
00664
00666 typedef enum UBlockCode UBlockCode;
00667
00672 enum UCellWidth
00673 {
00675 U_ZERO_WIDTH = 0,
00677 U_HALF_WIDTH = 1,
00679 U_FULL_WIDTH = 2,
00681 U_NEUTRAL_WIDTH = 3,
00683 U_CELL_WIDTH_COUNT
00684 };
00685
00687 typedef enum UCellWidth UCellWidth;
00688
00700 enum UCharNameChoice {
00701 U_UNICODE_CHAR_NAME,
00702 U_UNICODE_10_CHAR_NAME,
00703 U_EXTENDED_CHAR_NAME,
00704 U_CHAR_NAME_CHOICE_COUNT
00705 };
00706
00708 typedef enum UCharNameChoice UCharNameChoice;
00709
00722 U_CAPI UBool U_EXPORT2
00723 u_islower(UChar32 c);
00724
00736 U_CAPI UBool U_EXPORT2
00737 u_isupper(UChar32 c);
00738
00750 U_CAPI UBool U_EXPORT2
00751 u_istitle(UChar32 c);
00752
00760 U_CAPI UBool U_EXPORT2
00761 u_isdigit(UChar32 c);
00762
00771 U_CAPI UBool U_EXPORT2
00772 u_isalnum(UChar32 c);
00773
00789 U_CAPI UBool U_EXPORT2
00790 u_isdefined(UChar32 c);
00791
00803 U_CAPI UBool U_EXPORT2
00804 u_isalpha(UChar32 c);
00805
00813 U_CAPI UBool U_EXPORT2
00814 u_isspace(UChar32 c);
00815
00844 U_CAPI UBool U_EXPORT2
00845 u_isWhitespace(UChar32 c);
00846
00862 U_CAPI UBool U_EXPORT2
00863 u_iscntrl(UChar32 c);
00864
00865
00876 U_CAPI UBool U_EXPORT2
00877 u_isprint(UChar32 c);
00878
00890 U_CAPI UBool U_EXPORT2
00891 u_isbase(UChar32 c);
00892
00902 U_CAPI UCharDirection U_EXPORT2
00903 u_charDirection(UChar32 c);
00904
00915 U_CAPI UBool U_EXPORT2
00916 u_isMirrored(UChar32 c);
00917
00934 U_CAPI UChar32 U_EXPORT2
00935 u_charMirror(UChar32 c);
00936
00988 U_CAPI uint16_t U_EXPORT2
00989 u_charCellWidth(UChar32 c);
00990
01001 U_CAPI int8_t U_EXPORT2
01002 u_charType(UChar32 c);
01003
01021 typedef UBool U_CALLCONV
01022 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
01023
01043 U_CAPI void U_EXPORT2
01044 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
01045
01053 U_CAPI uint8_t U_EXPORT2
01054 u_getCombiningClass(UChar32 c);
01055
01064 U_CAPI int32_t U_EXPORT2
01065 u_charDigitValue(UChar32 c);
01066
01073 U_CAPI UBlockCode U_EXPORT2
01074 ublock_getCode(UChar32 ch);
01075
01108 U_CAPI UTextOffset U_EXPORT2
01109 u_charName(UChar32 code, UCharNameChoice nameChoice,
01110 char *buffer, UTextOffset bufferLength,
01111 UErrorCode *pErrorCode);
01112
01132 U_CAPI UChar32 U_EXPORT2
01133 u_charFromName(UCharNameChoice nameChoice,
01134 const char *name,
01135 UErrorCode *pErrorCode);
01136
01153 typedef UBool UEnumCharNamesFn(void *context,
01154 UChar32 code,
01155 UCharNameChoice nameChoice,
01156 const char *name,
01157 UTextOffset length);
01158
01179 U_CAPI void U_EXPORT2
01180 u_enumCharNames(UChar32 start, UChar32 limit,
01181 UEnumCharNamesFn *fn,
01182 void *context,
01183 UCharNameChoice nameChoice,
01184 UErrorCode *pErrorCode);
01185
01202 U_CAPI UBool U_EXPORT2
01203 u_isIDStart(UChar32 c);
01204
01229 U_CAPI UBool U_EXPORT2
01230 u_isIDPart(UChar32 c);
01231
01256 U_CAPI UBool U_EXPORT2
01257 u_isIDIgnorable(UChar32 c);
01258
01279 U_CAPI UBool U_EXPORT2
01280 u_isJavaIDStart(UChar32 c);
01281
01310 U_CAPI UBool U_EXPORT2
01311 u_isJavaIDPart(UChar32 c);
01312
01335 U_CAPI UChar32 U_EXPORT2
01336 u_tolower(UChar32 c);
01337
01353 U_CAPI UChar32 U_EXPORT2
01354 u_toupper(UChar32 c);
01355
01370 U_CAPI UChar32 U_EXPORT2
01371 u_totitle(UChar32 c);
01372
01374 #define U_FOLD_CASE_DEFAULT 0
01375
01376 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
01377
01391 U_CAPI UChar32 U_EXPORT2
01392 u_foldCase(UChar32 c, uint32_t options);
01393
01429 U_CAPI int32_t U_EXPORT2
01430 u_digit(UChar32 ch, int8_t radix);
01431
01458 U_CAPI UChar32 U_EXPORT2
01459 u_forDigit(int32_t digit, int8_t radix);
01460
01468 U_CAPI void U_EXPORT2
01469 u_getUnicodeVersion(UVersionInfo info);
01470
01471
01475 #define u_charScript ublock_getCode
01476
01477 typedef UBlockCode UCharScript;
01478
01479 U_CDECL_END
01480
01481 #endif
01482