00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef UNICODE_H
00028 #define UNICODE_H
00029
00030 #include "unicode/utypes.h"
00031 #include "unicode/uchar.h"
00032
00033 U_NAMESPACE_BEGIN
00055 class U_COMMON_API Unicode
00056 {
00057 public:
00058
00059
00060
00061
00062
00063
00064
00065 enum {
00067 MIN_VALUE=0,
00068
00074 MAX_VALUE=0x10ffff,
00075
00083 MAX_CHAR_LENGTH=UTF_MAX_CHAR_LENGTH,
00084
00095 MIN_RADIX=2,
00096
00107 MAX_RADIX=36
00108 };
00109
00116 enum EUnicodeGeneralTypes
00117 {
00118 UNASSIGNED = 0,
00119 UPPERCASE_LETTER = 1,
00120 LOWERCASE_LETTER = 2,
00121 TITLECASE_LETTER = 3,
00122 MODIFIER_LETTER = 4,
00123 OTHER_LETTER = 5,
00124 NON_SPACING_MARK = 6,
00125 ENCLOSING_MARK = 7,
00126 COMBINING_SPACING_MARK = 8,
00127 DECIMAL_DIGIT_NUMBER = 9,
00128 LETTER_NUMBER = 10,
00129 OTHER_NUMBER = 11,
00130 SPACE_SEPARATOR = 12,
00131 LINE_SEPARATOR = 13,
00132 PARAGRAPH_SEPARATOR = 14,
00133 CONTROL = 15,
00134 FORMAT = 16,
00135 PRIVATE_USE = 17,
00136 SURROGATE = 18,
00137 DASH_PUNCTUATION = 19,
00138 START_PUNCTUATION = 20,
00139 END_PUNCTUATION = 21,
00140 CONNECTOR_PUNCTUATION = 22,
00141 OTHER_PUNCTUATION = 23,
00142 MATH_SYMBOL = 24,
00143 CURRENCY_SYMBOL = 25,
00144 MODIFIER_SYMBOL = 26,
00145 OTHER_SYMBOL = 27,
00146 INITIAL_PUNCTUATION = 28,
00147 FINAL_PUNCTUATION = 29,
00148 GENERAL_TYPES_COUNT = 30
00149 };
00150
00151
00157 enum EUnicodeScript
00158 {
00159 kBasicLatin=UBLOCK_BASIC_LATIN,
00160 kLatin1Supplement,
00161 kLatinExtendedA,
00162 kLatinExtendedB,
00163 kIPAExtension,
00164 kSpacingModifier,
00165 kCombiningDiacritical,
00166 kGreek,
00167 kCyrillic,
00168 kArmenian,
00169 kHebrew,
00170 kArabic,
00171 kSyriac,
00172 kThaana,
00173 kDevanagari,
00174 kBengali,
00175 kGurmukhi,
00176 kGujarati,
00177 kOriya,
00178 kTamil,
00179 kTelugu,
00180 kKannada,
00181 kMalayalam,
00182 kSinhala,
00183 kThai,
00184 kLao,
00185 kTibetan,
00186 kMyanmar,
00187 kGeorgian,
00188 kHangulJamo,
00189 kEthiopic,
00190 kCherokee,
00191 kUnifiedCanadianAboriginalSyllabics,
00192 kogham,
00193 kRunic,
00194 kKhmer,
00195 kMongolian,
00196 kLatinExtendedAdditional,
00197 kGreekExtended,
00198 kGeneralPunctuation,
00199 kSuperSubScript,
00200 kCurrencySymbolScript,
00201 kSymbolCombiningMark,
00202 kLetterlikeSymbol,
00203 kNumberForm,
00204 kArrow,
00205 kMathOperator,
00206 kMiscTechnical,
00207 kControlPicture,
00208 kOpticalCharacter,
00209 kEnclosedAlphanumeric,
00210 kBoxDrawing,
00211 kBlockElement,
00212 kGeometricShape,
00213 kMiscSymbol,
00214 kDingbat,
00215 kBraillePatterns,
00216 kCJKRadicalsSupplement,
00217 kKangxiRadicals,
00218 kIdeographicDescriptionCharacters,
00219 kCJKSymbolPunctuation,
00220 kHiragana,
00221 kKatakana,
00222 kBopomofo,
00223 kHangulCompatibilityJamo,
00224 kKanbun,
00225 kBopomofoExtended,
00226 kEnclosedCJKLetterMonth,
00227 kCJKCompatibility,
00228 kCJKUnifiedIdeographExtensionA,
00229 kCJKUnifiedIdeograph,
00230 kYiSyllables,
00231 kYiRadicals,
00232 kHangulSyllable,
00233 kHighSurrogate,
00234 kHighPrivateUseSurrogate,
00235 kLowSurrogate,
00236 kPrivateUse,
00237 kCJKCompatibilityIdeograph,
00238 kAlphabeticPresentation,
00239 kArabicPresentationA,
00240 kCombiningHalfMark,
00241 kCJKCompatibilityForm,
00242 kSmallFormVariant,
00243 kArabicPresentationB,
00244 kNoScript,
00245 kHalfwidthFullwidthForm,
00246 kScriptCount=UBLOCK_COUNT
00247 };
00248
00254 enum EDirectionProperty {
00255 LEFT_TO_RIGHT = 0,
00256 RIGHT_TO_LEFT = 1,
00257 EUROPEAN_NUMBER = 2,
00258 EUROPEAN_NUMBER_SEPARATOR = 3,
00259 EUROPEAN_NUMBER_TERMINATOR = 4,
00260 ARABIC_NUMBER = 5,
00261 COMMON_NUMBER_SEPARATOR = 6,
00262 BLOCK_SEPARATOR = 7,
00263 SEGMENT_SEPARATOR = 8,
00264 WHITE_SPACE_NEUTRAL = 9,
00265 OTHER_NEUTRAL = 10,
00266 LEFT_TO_RIGHT_EMBEDDING = 11,
00267 LEFT_TO_RIGHT_OVERRIDE = 12,
00268 RIGHT_TO_LEFT_ARABIC = 13,
00269 RIGHT_TO_LEFT_EMBEDDING = 14,
00270 RIGHT_TO_LEFT_OVERRIDE = 15,
00271 POP_DIRECTIONAL_FORMAT = 16,
00272 DIR_NON_SPACING_MARK = 17,
00273 BOUNDARY_NEUTRAL = 18
00274 };
00275
00282 enum ECellWidths
00283 {
00284 ZERO_WIDTH = 0,
00285 HALF_WIDTH = 1,
00286 FULL_WIDTH = 2,
00287 NEUTRAL = 3
00288 };
00289
00301 static inline UBool isSingle(UChar c);
00302
00312 static inline UBool isLead(UChar c);
00313
00323 static inline UBool isTrail(UChar c);
00324
00336 static inline UBool isSurrogate(UChar32 c);
00337
00351 static inline UBool isUnicodeChar(UChar32 c);
00352
00365 static inline UBool isError(UChar32 c);
00366
00377 static inline UBool isValid(UChar32 c);
00378
00391 static inline UBool needMultipleUChar(UChar32 c);
00392
00402 static inline int32_t charLength(UChar32 c);
00403
00418 static inline int32_t arraySize(int32_t size);
00419
00433 static inline UBool isLowerCase(UChar32 ch);
00434
00447 static inline UBool isUpperCase(UChar32 ch);
00448
00461 static inline UBool isTitleCase(UChar32 ch);
00462
00475 static inline UBool isDigit(UChar32 ch);
00476
00493 static inline UBool isDefined(UChar32 ch);
00494
00506 static inline UBool isControl(UChar32 ch);
00507
00519 static inline UBool isPrintable(UChar32 ch);
00520
00533 static inline UBool isBaseForm(UChar32 ch);
00534
00551 static inline UBool isLetter(UChar32 ch);
00552
00574 static inline UBool isJavaIdentifierStart(UChar32 ch);
00575
00605 static inline UBool isJavaIdentifierPart(UChar32 ch);
00606
00622 static inline UBool isUnicodeIdentifierStart(UChar32 ch);
00623
00651 static inline UBool isUnicodeIdentifierPart(UChar32 ch);
00652
00679 static inline UBool isIdentifierIgnorable(UChar32 ch);
00680
00706 static inline UChar32 toLowerCase(UChar32 ch);
00707
00730 static inline UChar32 toUpperCase(UChar32 ch);
00731
00750 static inline UChar32 toTitleCase(UChar32 ch);
00751
00766 static inline UChar32
00767 foldCase(UChar32 c, uint32_t options);
00768
00778 static inline UBool isSpaceChar(UChar32 ch);
00779
00809 static inline UBool isWhitespace(UChar32 ch);
00810
00846 static inline int8_t getType(UChar32 ch);
00847
00856 static inline uint8_t getCombiningClass(UChar32 c);
00857
00868 static inline EDirectionProperty characterDirection(UChar32 ch);
00869
00881 static inline UBool isMirrored(UChar32 c);
00882
00900 static inline UChar32 charMirror(UChar32 c);
00901
00907 static inline EUnicodeScript getScript(UChar32 ch);
00908
00961 static inline uint16_t getCellWidth(UChar32 ch);
00962
00991 static inline UTextOffset
00992 getCharName(uint32_t code,
00993 char *buffer, UTextOffset bufferLength,
00994 UCharNameChoice nameChoice=U_UNICODE_CHAR_NAME);
00995
01007 static inline int32_t digitValue(UChar32 ch);
01008
01047 static inline int32_t digit(UChar32 ch, int8_t radix);
01048
01077 static inline UChar32 forDigit(int32_t digit, int8_t radix);
01078
01085 static void getUnicodeVersion(UVersionInfo info);
01086
01087 protected:
01088
01089
01090
01091
01092
01093 Unicode();
01094 Unicode(const Unicode &other);
01095 ~Unicode();
01096 const Unicode &operator=(const Unicode &other);
01097 };
01098
01099
01100
01101 inline UBool
01102 Unicode::isSingle(UChar c) {
01103 return UTF_IS_SINGLE(c);
01104 }
01105
01106 inline UBool
01107 Unicode::isLead(UChar c) {
01108 return UTF_IS_LEAD(c);
01109 }
01110
01111 inline UBool
01112 Unicode::isTrail(UChar c) {
01113 return UTF_IS_TRAIL(c);
01114 }
01115
01116 inline UBool
01117 Unicode::isSurrogate(UChar32 c) {
01118 return UTF_IS_SURROGATE(c);
01119 }
01120
01121 inline UBool
01122 Unicode::isUnicodeChar(UChar32 c) {
01123 return UTF_IS_UNICODE_CHAR(c);
01124 }
01125
01126 inline UBool
01127 Unicode::isError(UChar32 c) {
01128 return UTF_IS_ERROR(c);
01129 }
01130
01131 inline UBool
01132 Unicode::isValid(UChar32 c) {
01133 return UTF_IS_VALID(c);
01134 }
01135
01136 inline UBool
01137 Unicode::needMultipleUChar(UChar32 c) {
01138 return UTF_NEED_MULTIPLE_UCHAR(c);
01139 }
01140
01141 inline int32_t
01142 Unicode::charLength(UChar32 c) {
01143 return UTF_CHAR_LENGTH(c);
01144 }
01145
01146 inline int32_t
01147 Unicode::arraySize(int32_t size) {
01148 return UTF_ARRAY_SIZE(size);
01149 }
01150
01151
01152 inline UBool
01153 Unicode::isLowerCase(UChar32 ch) {
01154 return u_islower(ch);
01155 }
01156
01157
01158 inline UBool
01159 Unicode::isUpperCase(UChar32 ch) {
01160 return u_isupper(ch);
01161 }
01162
01163
01164 inline UBool
01165 Unicode::isTitleCase(UChar32 ch) {
01166 return u_istitle(ch);
01167 }
01168
01169
01170 inline UBool
01171 Unicode::isDigit(UChar32 ch) {
01172 return u_isdigit(ch);
01173 }
01174
01175
01176 inline UBool
01177 Unicode::isDefined(UChar32 ch) {
01178 return u_isdefined(ch);
01179 }
01180
01181
01182 inline UBool
01183 Unicode::isControl(UChar32 ch) {
01184 return u_iscntrl(ch);
01185 }
01186
01187
01188 inline UBool
01189 Unicode::isPrintable(UChar32 ch) {
01190 return u_isprint(ch);
01191 }
01192
01193
01194 inline UBool
01195 Unicode::isBaseForm(UChar32 ch) {
01196 return u_isbase(ch);
01197 }
01198
01199
01200 inline UBool
01201 Unicode::isLetter(UChar32 ch) {
01202 return u_isalpha(ch);
01203 }
01204
01205
01206 inline UBool
01207 Unicode::isJavaIdentifierStart(UChar32 ch) {
01208 return u_isJavaIDStart(ch);
01209 }
01210
01211
01212
01213 inline UBool
01214 Unicode::isJavaIdentifierPart(UChar32 ch) {
01215 return u_isJavaIDPart(ch);
01216 }
01217
01218
01219 inline UBool
01220 Unicode::isUnicodeIdentifierStart(UChar32 ch) {
01221 return u_isIDStart(ch);
01222 }
01223
01224
01225
01226 inline UBool
01227 Unicode::isUnicodeIdentifierPart(UChar32 ch) {
01228 return u_isIDPart(ch);
01229 }
01230
01231
01232 inline UBool
01233 Unicode::isIdentifierIgnorable(UChar32 ch) {
01234 return u_isIDIgnorable(ch);
01235 }
01236
01237
01238 inline UChar32
01239 Unicode::toLowerCase(UChar32 ch) {
01240 return u_tolower(ch);
01241 }
01242
01243
01244 inline UChar32
01245 Unicode::toUpperCase(UChar32 ch) {
01246 return u_toupper(ch);
01247 }
01248
01249
01250 inline UChar32
01251 Unicode::toTitleCase(UChar32 ch) {
01252 return u_totitle(ch);
01253 }
01254
01255
01256 inline UChar32
01257 Unicode::foldCase(UChar32 ch, uint32_t options) {
01258 return u_foldCase(ch, options);
01259 }
01260
01261
01262 inline UBool
01263 Unicode::isSpaceChar(UChar32 ch) {
01264 return u_isspace(ch);
01265 }
01266
01267
01268 inline UBool
01269 Unicode::isWhitespace(UChar32 ch) {
01270 return u_isWhitespace(ch);
01271 }
01272
01273
01274 inline int8_t
01275 Unicode::getType(UChar32 ch) {
01276 return u_charType(ch);
01277 }
01278
01279 inline uint8_t
01280 Unicode::getCombiningClass(UChar32 c) {
01281 return u_getCombiningClass(c);
01282 }
01283
01284
01285 inline Unicode::EDirectionProperty
01286 Unicode::characterDirection(UChar32 ch) {
01287 return (EDirectionProperty)u_charDirection(ch);
01288 }
01289
01290
01291 inline UBool
01292 Unicode::isMirrored(UChar32 ch) {
01293 return u_isMirrored(ch);
01294 }
01295
01296
01297 inline UChar32
01298 Unicode::charMirror(UChar32 ch) {
01299 return u_charMirror(ch);
01300 }
01301
01302
01303 inline Unicode::EUnicodeScript
01304 Unicode::getScript(UChar32 ch) {
01305 return (EUnicodeScript) u_charScript(ch);
01306 }
01307
01308
01309 inline uint16_t
01310 Unicode::getCellWidth(UChar32 ch) {
01311 return u_charCellWidth(ch);
01312 }
01313
01314 inline UTextOffset
01315 Unicode::getCharName(uint32_t code,
01316 char *buffer, UTextOffset bufferLength,
01317 UCharNameChoice nameChoice) {
01318 UErrorCode errorCode=U_ZERO_ERROR;
01319 UTextOffset length=u_charName(code, nameChoice, buffer, bufferLength, &errorCode);
01320 return U_SUCCESS(errorCode) ? length : 0;
01321 }
01322
01323 inline int32_t
01324 Unicode::digitValue(UChar32 ch) {
01325 return u_charDigitValue(ch);
01326 }
01327
01328 inline int32_t
01329 Unicode::digit(UChar32 ch, int8_t radix) {
01330 return u_digit(ch, radix);
01331 }
01332
01333 inline UChar32
01334 Unicode::forDigit(int32_t digit, int8_t radix) {
01335 return u_forDigit(digit, radix);
01336 }
01337
01338 inline void
01339 Unicode::getUnicodeVersion(UVersionInfo versionArray) {
01340 u_getUnicodeVersion(versionArray);
01341 }
01342 U_NAMESPACE_END
01343
01344 #endif