Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

uniset.h

00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1999-2003, International Business Machines Corporation and others. All Rights Reserved. 00004 ********************************************************************** 00005 * Date Name Description 00006 * 10/20/99 alan Creation. 00007 ********************************************************************** 00008 */ 00009 00010 #ifndef UNICODESET_H 00011 #define UNICODESET_H 00012 00013 #include "unicode/unifilt.h" 00014 #include "unicode/utypes.h" 00015 #include "unicode/unistr.h" 00016 #include "unicode/uchar.h" 00017 #include "unicode/uset.h" 00018 00019 U_NAMESPACE_BEGIN 00020 00021 class ParsePosition; 00022 class SymbolTable; 00023 class UVector; 00024 class CaseEquivClass; 00025 class RuleCharacterIterator; 00026 00258 class U_COMMON_API UnicodeSet : public UnicodeFilter { 00259 00260 int32_t len; // length of list used; 0 <= len <= capacity 00261 int32_t capacity; // capacity of list 00262 int32_t bufferCapacity; // capacity of buffer 00263 UChar32* list; // MUST be terminated with HIGH 00264 UChar32* buffer; // internal buffer, may be NULL 00265 00266 UVector* strings; // maintained in sorted order 00267 00277 UnicodeString pat; 00278 00279 public: 00280 00285 #ifdef U_CYGWIN 00286 static U_COMMON_API const UChar32 MIN_VALUE; 00287 #else 00288 static const UChar32 MIN_VALUE; 00289 #endif 00290 00295 #ifdef U_CYGWIN 00296 static U_COMMON_API const UChar32 MAX_VALUE; 00297 #else 00298 static const UChar32 MAX_VALUE; 00299 #endif 00300 00301 //---------------------------------------------------------------- 00302 // Constructors &c 00303 //---------------------------------------------------------------- 00304 00305 public: 00306 00311 UnicodeSet(); 00312 00321 UnicodeSet(UChar32 start, UChar32 end); 00322 00331 UnicodeSet(const UnicodeString& pattern, 00332 UErrorCode& status); 00333 00346 UnicodeSet(const UnicodeString& pattern, 00347 uint32_t options, 00348 const SymbolTable* symbols, 00349 UErrorCode& status); 00350 00364 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, 00365 uint32_t options, 00366 const SymbolTable* symbols, 00367 UErrorCode& status); 00368 00369 #ifdef U_USE_UNICODESET_DEPRECATES 00370 00376 UnicodeSet(int8_t category, UErrorCode& status); 00377 #endif 00378 00383 UnicodeSet(const UnicodeSet& o); 00384 00389 virtual ~UnicodeSet(); 00390 00395 UnicodeSet& operator=(const UnicodeSet& o); 00396 00408 virtual UBool operator==(const UnicodeSet& o) const; 00409 00415 UBool operator!=(const UnicodeSet& o) const; 00416 00423 virtual UnicodeFunctor* clone() const; 00424 00432 virtual int32_t hashCode(void) const; 00433 00434 //---------------------------------------------------------------- 00435 // Public API 00436 //---------------------------------------------------------------- 00437 00447 UnicodeSet& set(UChar32 start, UChar32 end); 00448 00454 static UBool resemblesPattern(const UnicodeString& pattern, 00455 int32_t pos); 00456 00468 virtual UnicodeSet& applyPattern(const UnicodeString& pattern, 00469 UErrorCode& status); 00470 00486 UnicodeSet& applyPattern(const UnicodeString& pattern, 00487 uint32_t options, 00488 const SymbolTable* symbols, 00489 UErrorCode& status); 00490 00521 UnicodeSet& applyPattern(const UnicodeString& pattern, 00522 ParsePosition& pos, 00523 uint32_t options, 00524 const SymbolTable* symbols, 00525 UErrorCode& status); 00526 00539 virtual UnicodeString& toPattern(UnicodeString& result, 00540 UBool escapeUnprintable = FALSE) const; 00541 00563 UnicodeSet& applyIntPropertyValue(UProperty prop, 00564 int32_t value, 00565 UErrorCode& ec); 00566 00594 UnicodeSet& applyPropertyAlias(const UnicodeString& prop, 00595 const UnicodeString& value, 00596 UErrorCode& ec); 00597 00605 virtual int32_t size(void) const; 00606 00613 virtual UBool isEmpty(void) const; 00614 00621 virtual UBool contains(UChar32 c) const; 00622 00631 virtual UBool contains(UChar32 start, UChar32 end) const; 00632 00640 UBool contains(const UnicodeString& s) const; 00641 00649 virtual UBool containsAll(const UnicodeSet& c) const; 00650 00658 UBool containsAll(const UnicodeString& s) const; 00659 00668 UBool containsNone(UChar32 start, UChar32 end) const; 00669 00677 UBool containsNone(const UnicodeSet& c) const; 00678 00686 UBool containsNone(const UnicodeString& s) const; 00687 00696 inline UBool containsSome(UChar32 start, UChar32 end) const; 00697 00705 inline UBool containsSome(const UnicodeSet& s) const; 00706 00714 inline UBool containsSome(const UnicodeString& s) const; 00715 00720 UMatchDegree matches(const Replaceable& text, 00721 int32_t& offset, 00722 int32_t limit, 00723 UBool incremental); 00724 00725 private: 00747 static int32_t matchRest(const Replaceable& text, 00748 int32_t start, int32_t limit, 00749 const UnicodeString& s); 00750 00760 int32_t findCodePoint(UChar32 c) const; 00761 00762 public: 00763 00771 void addMatchSetTo(UnicodeSet& toUnionTo) const; 00772 00781 int32_t indexOf(UChar32 c) const; 00782 00792 UChar32 charAt(int32_t index) const; 00793 00807 virtual UnicodeSet& add(UChar32 start, UChar32 end); 00808 00815 UnicodeSet& add(UChar32 c); 00816 00827 UnicodeSet& add(const UnicodeString& s); 00828 00829 private: 00835 static int32_t getSingleCP(const UnicodeString& s); 00836 00837 void _add(const UnicodeString& s); 00838 00839 public: 00847 UnicodeSet& addAll(const UnicodeString& s); 00848 00856 UnicodeSet& retainAll(const UnicodeString& s); 00857 00865 UnicodeSet& complementAll(const UnicodeString& s); 00866 00874 UnicodeSet& removeAll(const UnicodeString& s); 00875 00884 static UnicodeSet* createFrom(const UnicodeString& s); 00885 00886 00894 static UnicodeSet* createFromAll(const UnicodeString& s); 00895 00908 virtual UnicodeSet& retain(UChar32 start, UChar32 end); 00909 00910 00915 UnicodeSet& retain(UChar32 c); 00916 00929 virtual UnicodeSet& remove(UChar32 start, UChar32 end); 00930 00937 UnicodeSet& remove(UChar32 c); 00938 00947 UnicodeSet& remove(const UnicodeString& s); 00948 00955 virtual UnicodeSet& complement(void); 00956 00970 virtual UnicodeSet& complement(UChar32 start, UChar32 end); 00971 00978 UnicodeSet& complement(UChar32 c); 00979 00989 UnicodeSet& complement(const UnicodeString& s); 00990 01002 virtual UnicodeSet& addAll(const UnicodeSet& c); 01003 01014 virtual UnicodeSet& retainAll(const UnicodeSet& c); 01015 01026 virtual UnicodeSet& removeAll(const UnicodeSet& c); 01027 01037 virtual UnicodeSet& complementAll(const UnicodeSet& c); 01038 01044 virtual UnicodeSet& clear(void); 01045 01069 UnicodeSet& closeOver(int32_t attribute); 01070 01078 virtual int32_t getRangeCount(void) const; 01079 01087 virtual UChar32 getRangeStart(int32_t index) const; 01088 01096 virtual UChar32 getRangeEnd(int32_t index) const; 01097 01146 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const; 01147 01153 virtual UnicodeSet& compact(); 01154 01166 static UClassID getStaticClassID(void); 01167 01176 virtual UClassID getDynamicClassID(void) const; 01177 01178 private: 01179 01180 // Private API for the USet API 01181 01182 friend class USetAccess; 01183 01184 int32_t getStringCount() const; 01185 01186 const UnicodeString* getString(int32_t index) const; 01187 01188 //---------------------------------------------------------------- 01189 // RuleBasedTransliterator support 01190 //---------------------------------------------------------------- 01191 01192 private: 01193 01199 virtual UBool matchesIndexValue(uint8_t v) const; 01200 01201 private: 01202 01203 //---------------------------------------------------------------- 01204 // Implementation: Pattern parsing 01205 //---------------------------------------------------------------- 01206 01207 void applyPattern(RuleCharacterIterator& chars, 01208 const SymbolTable* symbols, 01209 UnicodeString& rebuiltPat, 01210 uint32_t options, 01211 UErrorCode& ec); 01212 01213 //---------------------------------------------------------------- 01214 // Implementation: Utility methods 01215 //---------------------------------------------------------------- 01216 01217 void ensureCapacity(int32_t newLen); 01218 01219 void ensureBufferCapacity(int32_t newLen); 01220 01221 void swapBuffers(void); 01222 01223 UBool allocateStrings(); 01224 01225 UnicodeString& _toPattern(UnicodeString& result, 01226 UBool escapeUnprintable) const; 01227 01228 UnicodeString& _generatePattern(UnicodeString& result, 01229 UBool escapeUnprintable) const; 01230 01231 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable); 01232 01233 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable); 01234 01235 //---------------------------------------------------------------- 01236 // Implementation: Fundamental operators 01237 //---------------------------------------------------------------- 01238 01239 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity); 01240 01241 void add(const UChar32* other, int32_t otherLen, int8_t polarity); 01242 01243 void retain(const UChar32* other, int32_t otherLen, int8_t polarity); 01244 01250 static UBool resemblesPropertyPattern(const UnicodeString& pattern, 01251 int32_t pos); 01252 01253 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars, 01254 int32_t iterOpts); 01255 01294 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern, 01295 ParsePosition& ppos, 01296 UErrorCode &ec); 01297 01298 void applyPropertyPattern(RuleCharacterIterator& chars, 01299 UnicodeString& rebuiltPat, 01300 UErrorCode& ec); 01301 01306 typedef UBool (*Filter)(UChar32 codePoint, void* context); 01307 01316 void applyFilter(Filter filter, 01317 void* context, 01318 UErrorCode &status); 01319 01324 static const UnicodeSet* getInclusions(UErrorCode &errorCode); 01325 01326 friend class UnicodeSetIterator; 01327 01328 //---------------------------------------------------------------- 01329 // Implementation: closeOver 01330 //---------------------------------------------------------------- 01331 01332 void caseCloseOne(const UnicodeString& folded); 01333 01334 void caseCloseOne(const CaseEquivClass& c); 01335 01336 void caseCloseOne(UChar folded); 01337 01338 static const CaseEquivClass* getCaseMapOf(const UnicodeString& folded); 01339 01340 static const CaseEquivClass* getCaseMapOf(UChar folded); 01341 }; 01342 01343 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const { 01344 return !operator==(o); 01345 } 01346 01347 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const { 01348 return !containsNone(start, end); 01349 } 01350 01351 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const { 01352 return !containsNone(s); 01353 } 01354 01355 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const { 01356 return !containsNone(s); 01357 } 01358 01359 U_NAMESPACE_END 01360 01361 #endif

Generated on Wed Sep 15 17:18:09 2004 for ICU 2.8 by doxygen 1.3.8