00001
00002
00003
00004
00005
00006
00007
00008
00009
#ifndef NORMLZR_H
00010
#define NORMLZR_H
00011
00012
#include "unicode/utypes.h"
00013
00014
#if !UCONFIG_NO_NORMALIZATION
00015
00016
#include "unicode/uobject.h"
00017
#include "unicode/unistr.h"
00018
#include "unicode/chariter.h"
00019
#include "unicode/unorm.h"
00020
00021
struct UCharIterator;
00022
typedef struct UCharIterator UCharIterator;
00024
U_NAMESPACE_BEGIN
00115 class U_COMMON_API Normalizer :
public UObject {
00116
public:
00122
enum {
00123 DONE=0xffff
00124 };
00125
00126
00127
00138 Normalizer(
const UnicodeString& str,
UNormalizationMode mode);
00139
00151 Normalizer(
const UChar* str, int32_t length,
UNormalizationMode mode);
00152
00163 Normalizer(
const CharacterIterator& iter,
UNormalizationMode mode);
00164
00170 Normalizer(
const Normalizer& copy);
00171
00176 ~Normalizer();
00177
00178
00179
00180
00181
00182
00197
static void normalize(
const UnicodeString& source,
00198
UNormalizationMode mode, int32_t options,
00199
UnicodeString& result,
00200
UErrorCode &status);
00201
00219
static void compose(
const UnicodeString& source,
00220
UBool compat, int32_t options,
00221
UnicodeString& result,
00222
UErrorCode &status);
00223
00241
static void decompose(
const UnicodeString& source,
00242
UBool compat, int32_t options,
00243
UnicodeString& result,
00244
UErrorCode &status);
00245
00266
static inline UNormalizationCheckResult
00267 quickCheck(
const UnicodeString &source,
UNormalizationMode mode,
UErrorCode &status);
00268
00282
static inline UNormalizationCheckResult
00283 quickCheck(
const UnicodeString &source,
UNormalizationMode mode, int32_t options,
UErrorCode &status);
00284
00305
static inline UBool
00306 isNormalized(
const UnicodeString &src,
UNormalizationMode mode,
UErrorCode &errorCode);
00307
00323
static inline UBool
00324 isNormalized(
const UnicodeString &src,
UNormalizationMode mode, int32_t options,
UErrorCode &errorCode);
00325
00355
static UnicodeString &
00356 concatenate(
UnicodeString &left,
UnicodeString &right,
00357
UnicodeString &result,
00358
UNormalizationMode mode, int32_t options,
00359
UErrorCode &errorCode);
00360
00425
static inline int32_t
00426 compare(
const UnicodeString &s1,
const UnicodeString &s2,
00427 uint32_t options,
00428
UErrorCode &errorCode);
00429
00430
00431
00432
00433
00442
UChar32 current(
void);
00443
00452
UChar32 first(
void);
00453
00462
UChar32 last(
void);
00463
00478
UChar32 next(
void);
00479
00494
UChar32 previous(
void);
00495
00505
void setIndexOnly(int32_t index);
00506
00512
void reset(
void);
00513
00528 int32_t getIndex(
void)
const;
00529
00538 int32_t startIndex(
void)
const;
00539
00550 int32_t endIndex(
void)
const;
00551
00560
UBool operator==(
const Normalizer& that)
const;
00561
00570
inline UBool operator!=(
const Normalizer& that)
const;
00571
00578 Normalizer* clone(
void)
const;
00579
00586 int32_t hashCode(
void)
const;
00587
00588
00589
00590
00591
00607
void setMode(
UNormalizationMode newMode);
00608
00619
UNormalizationMode getUMode(
void)
const;
00620
00637
void setOption(int32_t option,
00638
UBool value);
00639
00650
UBool getOption(int32_t option)
const;
00651
00660
void setText(
const UnicodeString& newText,
00661
UErrorCode &status);
00662
00671
void setText(
const CharacterIterator& newText,
00672
UErrorCode &status);
00673
00683
void setText(
const UChar* newText,
00684 int32_t length,
00685
UErrorCode &status);
00692
void getText(
UnicodeString& result);
00693
00699
static UClassID getStaticClassID();
00700
00706
virtual UClassID getDynamicClassID()
const;
00707
00708
private:
00709
00710
00711
00712
00713 Normalizer();
00714 Normalizer &operator=(
const Normalizer &that);
00715
00716
00717
00718
UBool nextNormalize();
00719
UBool previousNormalize();
00720
00721
void init(
CharacterIterator *iter);
00722
void clearBuffer(
void);
00723
00724
00725
00726
00727
00728
UNormalizationMode fUMode;
00729 int32_t fOptions;
00730
00731
00732 UCharIterator *text;
00733
00734
00735
00736 int32_t currentIndex, nextIndex;
00737
00738
00739
UnicodeString buffer;
00740 int32_t bufferPos;
00741
00742 };
00743
00744
00745
00746
00747
00748
inline UBool
00749 Normalizer::operator!= (
const Normalizer& other)
const
00750
{
return !
operator==(other); }
00751
00752
inline UNormalizationCheckResult
00753 Normalizer::quickCheck(
const UnicodeString& source,
00754 UNormalizationMode mode,
00755
UErrorCode &status) {
00756
if(
U_FAILURE(status)) {
00757
return UNORM_MAYBE;
00758 }
00759
00760
return unorm_quickCheck(source.
getBuffer(), source.
length(),
00761 mode, &status);
00762 }
00763
00764
inline UNormalizationCheckResult
00765 Normalizer::quickCheck(
const UnicodeString& source,
00766 UNormalizationMode mode, int32_t options,
00767
UErrorCode &status) {
00768
if(
U_FAILURE(status)) {
00769
return UNORM_MAYBE;
00770 }
00771
00772
return unorm_quickCheckWithOptions(source.
getBuffer(), source.
length(),
00773 mode, options, &status);
00774 }
00775
00776
inline UBool
00777 Normalizer::isNormalized(
const UnicodeString& source,
00778 UNormalizationMode mode,
00779
UErrorCode &status) {
00780
if(
U_FAILURE(status)) {
00781
return FALSE;
00782 }
00783
00784
return unorm_isNormalized(source.
getBuffer(), source.
length(),
00785 mode, &status);
00786 }
00787
00788
inline UBool
00789 Normalizer::isNormalized(
const UnicodeString& source,
00790 UNormalizationMode mode, int32_t options,
00791
UErrorCode &status) {
00792
if(
U_FAILURE(status)) {
00793
return FALSE;
00794 }
00795
00796
return unorm_isNormalizedWithOptions(source.
getBuffer(), source.
length(),
00797 mode, options, &status);
00798 }
00799
00800
inline int32_t
00801 Normalizer::compare(
const UnicodeString &s1,
const UnicodeString &s2,
00802 uint32_t options,
00803
UErrorCode &errorCode) {
00804
00805
return unorm_compare(s1.
getBuffer(), s1.
length(),
00806 s2.
getBuffer(), s2.
length(),
00807 options,
00808 &errorCode);
00809 }
00810
00811
U_NAMESPACE_END
00812
00813
#endif
00814
00815
#endif // NORMLZR_H