00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
#ifndef REGEX_H
00017
#define REGEX_H
00018
00019
00020
00040
#include "unicode/utypes.h"
00041
00042
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
00043
00044
#include "unicode/uobject.h"
00045
#include "unicode/unistr.h"
00046
#include "unicode/parseerr.h"
00047
00048
U_NAMESPACE_BEGIN
00049
00050
00051
00052
00053
class RegexMatcher;
00054
class RegexPattern;
00055
class UVector;
00056
class UVector32;
00057
class UnicodeSet;
00058
struct REStackFrame;
00059
struct Regex8BitSet;
00060
class RuleBasedBreakIterator;
00061
00062
00063
00068
enum {
00070
UREGEX_CANON_EQ = 128,
00071
00073
UREGEX_CASE_INSENSITIVE = 2,
00074
00076
UREGEX_COMMENTS = 4,
00077
00080
UREGEX_DOTALL = 32,
00081
00086
UREGEX_MULTILINE = 8,
00087
00095
UREGEX_UWORD = 256
00096 };
00097
00098
00099
00100
00105
#ifdef REGEX_DEBUG
00106
U_CAPI void U_EXPORT2
00107
RegexPatternDump(
const RegexPattern *pat);
00108
#else
00109 #define RegexPatternDump(pat)
00110
#endif
00111
00112
00113
00125 class U_I18N_API RegexPattern:
public UObject {
00126
public:
00127
00135 RegexPattern();
00136
00142 RegexPattern(
const RegexPattern &source);
00143
00149
virtual ~RegexPattern();
00150
00159
UBool operator==(
const RegexPattern& that)
const;
00160
00169 inline UBool operator!=(
const RegexPattern& that)
const {
return ! operator ==(that);};
00170
00176 RegexPattern &operator =(
const RegexPattern &source);
00177
00185
virtual RegexPattern *clone() const;
00186
00187
00208 static RegexPattern *compile( const
UnicodeString ®ex,
00209
UParseError &pe,
00210
UErrorCode &status);
00211
00232 static RegexPattern *compile( const
UnicodeString ®ex,
00233 uint32_t flags,
00234
UParseError &pe,
00235
UErrorCode &status);
00236
00237
00256 static RegexPattern *compile( const
UnicodeString ®ex,
00257 uint32_t flags,
00258
UErrorCode &status);
00259
00260
00266 virtual uint32_t flags() const;
00267
00280 virtual
RegexMatcher *matcher(const
UnicodeString &input,
00281
UErrorCode &status) const;
00282
00283
00295 virtual
RegexMatcher *matcher(
UErrorCode &status) const;
00296
00297
00312 static
UBool matches(const
UnicodeString ®ex,
00313 const
UnicodeString &input,
00314
UParseError &pe,
00315
UErrorCode &status);
00316
00317
00322 virtual
UnicodeString pattern() const;
00323
00324
00350 virtual int32_t split(const
UnicodeString &input,
00351
UnicodeString dest[],
00352 int32_t destCapacity,
00353
UErrorCode &status) const;
00354
00355
00361 virtual
UClassID getDynamicClassID() const;
00362
00368 static
UClassID getStaticClassID();
00369
00370 private:
00371
00372
00373
00374
UnicodeString fPattern;
00375 uint32_t fFlags;
00376
00377 UVector32 *fCompiledPat;
00378
UnicodeString fLiteralText;
00379
00380
00381 UVector *fSets;
00382 Regex8BitSet *fSets8;
00383
00384
00385
UErrorCode fDeferredStatus;
00386
00387
00388 int32_t fMinMatchLen;
00389
00390
00391
00392
00393 int32_t fFrameSize;
00394
00395
00396 int32_t fDataSize;
00397
00398
00399
00400 UVector32 *fGroupMap;
00401
00402
00403 int32_t fMaxCaptureDigits;
00404
00405
UnicodeSet **fStaticSets;
00406
00407
00408 Regex8BitSet *fStaticSets8;
00409
00410
00411 int32_t fStartType;
00412 int32_t fInitialStringIdx;
00413 int32_t fInitialStringLen;
00414
UnicodeSet *fInitialChars;
00415
UChar32 fInitialChar;
00416 Regex8BitSet *fInitialChars8;
00417
00418 friend class RegexCompile;
00419 friend class
RegexMatcher;
00420
00421
00422
00423
00424
void init();
00425
void zap();
00426 #ifdef REGEX_DEBUG
00427
void dumpOp(int32_t index) const;
00428 friend
void RegexPatternDump(const RegexPattern *);
00429 #endif
00430
00431 };
00432
00433
00434
00444 class U_I18N_API RegexMatcher: public
UObject {
00445
public:
00446
00461 RegexMatcher(
const UnicodeString ®exp, uint32_t flags,
UErrorCode &status);
00462
00478 RegexMatcher(
const UnicodeString ®exp,
const UnicodeString &input,
00479 uint32_t flags,
UErrorCode &status);
00480
00481
00487
virtual ~RegexMatcher();
00488
00489
00496
virtual UBool matches(
UErrorCode &status);
00497
00506
virtual UBool matches(int32_t startIndex,
UErrorCode &status);
00507
00508
00509
00510
00523
virtual UBool lookingAt(
UErrorCode &status);
00524
00525
00539
virtual UBool lookingAt(int32_t startIndex,
UErrorCode &status);
00540
00553
virtual UBool find();
00554
00555
00565
virtual UBool find(int32_t start,
UErrorCode &status);
00566
00567
00577
virtual UnicodeString group(
UErrorCode &status)
const;
00578
00579
00592
virtual UnicodeString group(int32_t groupNum,
UErrorCode &status)
const;
00593
00594
00600
virtual int32_t groupCount()
const;
00601
00602
00610
virtual int32_t start(
UErrorCode &status)
const;
00611
00612
00626
virtual int32_t start(
int group,
UErrorCode &status)
const;
00627
00628
00638
virtual int32_t end(
UErrorCode &status)
const;
00639
00640
00654
virtual int32_t end(
int group,
UErrorCode &status)
const;
00655
00656
00665
virtual RegexMatcher &reset();
00666
00667
00677
virtual RegexMatcher &reset(int32_t index,
UErrorCode &status);
00678
00679
00687
virtual RegexMatcher &reset(
const UnicodeString &input);
00688
00689
00696
virtual const UnicodeString &input()
const;
00697
00698
00704
virtual const RegexPattern &pattern()
const;
00705
00706
00723
virtual UnicodeString replaceAll(
const UnicodeString &replacement,
UErrorCode &status);
00724
00725
00746
virtual UnicodeString replaceFirst(
const UnicodeString &replacement,
UErrorCode &status);
00747
00775
virtual RegexMatcher &appendReplacement(
UnicodeString &dest,
00776
const UnicodeString &replacement,
UErrorCode &status);
00777
00778
00789
virtual UnicodeString &appendTail(
UnicodeString &dest);
00790
00791
00792
00817
virtual int32_t split(
const UnicodeString &input,
00818
UnicodeString dest[],
00819 int32_t destCapacity,
00820
UErrorCode &status);
00821
00822
00823
00829
void setTrace(
UBool state);
00830
00831
00837
static UClassID getStaticClassID();
00838
00844
virtual UClassID getDynamicClassID()
const;
00845
00846
private:
00847
00848
00849 RegexMatcher();
00850 RegexMatcher(
const RegexPattern *pat);
00851 RegexMatcher(
const RegexMatcher &other);
00852 RegexMatcher &operator =(
const RegexMatcher &rhs);
00853
friend class RegexPattern;
00854
00855
00856
00857
00858
00859
00860
void MatchAt(int32_t startIdx,
UErrorCode &status);
00861
inline void backTrack(int32_t &inputIdx, int32_t &patIdx);
00862
UBool isWordBoundary(int32_t pos);
00863
UBool isUWordBoundary(int32_t pos);
00864 REStackFrame *resetStack();
00865
inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx,
00866 int32_t frameSize,
UErrorCode &status);
00867
00868
00869
const RegexPattern *fPattern;
00870 RegexPattern *fPatternOwned;
00871
00872
const UnicodeString *fInput;
00873
00874
UBool fMatch;
00875 int32_t fMatchStart;
00876 int32_t fMatchEnd;
00877 int32_t fLastMatchEnd;
00878
00879 UVector32 *fStack;
00880 REStackFrame *fFrame;
00881
00882
00883
00884 int32_t *fData;
00885 int32_t fSmallData[8];
00886
00887
UBool fTraceDebug;
00888
00889
UErrorCode fDeferredStatus;
00890
00891
00892
RuleBasedBreakIterator *fWordBreakItr;
00893
00894 };
00895
00896
U_NAMESPACE_END
00897
#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
00898
#endif