00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef REGEX_H
00017 #define REGEX_H
00018
00019
00020
00045 #include "unicode/utypes.h"
00046
00047 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00048
00049 #include "unicode/uobject.h"
00050 #include "unicode/unistr.h"
00051 #include "unicode/utext.h"
00052 #include "unicode/parseerr.h"
00053
00054 #include "unicode/uregex.h"
00055
00056
00057
00058 struct UHashtable;
00059
00060 U_NAMESPACE_BEGIN
00061
00062 struct Regex8BitSet;
00063 class RegexCImpl;
00064 class RegexMatcher;
00065 class RegexPattern;
00066 struct REStackFrame;
00067 class RuleBasedBreakIterator;
00068 class UnicodeSet;
00069 class UVector;
00070 class UVector32;
00071 class UVector64;
00072
00073
00085 class U_I18N_API RegexPattern U_FINAL : public UObject {
00086 public:
00087
00095 RegexPattern();
00096
00103 RegexPattern(const RegexPattern &source);
00104
00110 virtual ~RegexPattern();
00111
00120 UBool operator==(const RegexPattern& that) const;
00121
00130 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
00131
00137 RegexPattern &operator =(const RegexPattern &source);
00138
00146 virtual RegexPattern *clone() const;
00147
00148
00173 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00174 UParseError &pe,
00175 UErrorCode &status);
00176
00203 static RegexPattern * U_EXPORT2 compile( UText *regex,
00204 UParseError &pe,
00205 UErrorCode &status);
00206
00231 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00232 uint32_t flags,
00233 UParseError &pe,
00234 UErrorCode &status);
00235
00262 static RegexPattern * U_EXPORT2 compile( UText *regex,
00263 uint32_t flags,
00264 UParseError &pe,
00265 UErrorCode &status);
00266
00289 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00290 uint32_t flags,
00291 UErrorCode &status);
00292
00317 static RegexPattern * U_EXPORT2 compile( UText *regex,
00318 uint32_t flags,
00319 UErrorCode &status);
00320
00326 virtual uint32_t flags() const;
00327
00345 virtual RegexMatcher *matcher(const UnicodeString &input,
00346 UErrorCode &status) const;
00347
00348 private:
00361 RegexMatcher *matcher(const UChar *input,
00362 UErrorCode &status) const;
00363 public:
00364
00365
00377 virtual RegexMatcher *matcher(UErrorCode &status) const;
00378
00379
00394 static UBool U_EXPORT2 matches(const UnicodeString ®ex,
00395 const UnicodeString &input,
00396 UParseError &pe,
00397 UErrorCode &status);
00398
00413 static UBool U_EXPORT2 matches(UText *regex,
00414 UText *input,
00415 UParseError &pe,
00416 UErrorCode &status);
00417
00426 virtual UnicodeString pattern() const;
00427
00428
00439 virtual UText *patternText(UErrorCode &status) const;
00440
00441
00455 virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
00456
00457
00474 virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
00475
00476
00515 virtual int32_t split(const UnicodeString &input,
00516 UnicodeString dest[],
00517 int32_t destCapacity,
00518 UErrorCode &status) const;
00519
00520
00559 virtual int32_t split(UText *input,
00560 UText *dest[],
00561 int32_t destCapacity,
00562 UErrorCode &status) const;
00563
00564
00570 virtual UClassID getDynamicClassID() const;
00571
00577 static UClassID U_EXPORT2 getStaticClassID();
00578
00579 private:
00580
00581
00582
00583 UText *fPattern;
00584 UnicodeString *fPatternString;
00585 uint32_t fFlags;
00586
00587 UVector64 *fCompiledPat;
00588 UnicodeString fLiteralText;
00589
00590
00591 UVector *fSets;
00592 Regex8BitSet *fSets8;
00593
00594
00595 UErrorCode fDeferredStatus;
00596
00597
00598 int32_t fMinMatchLen;
00599
00600
00601
00602
00603 int32_t fFrameSize;
00604
00605
00606 int32_t fDataSize;
00607
00608
00609
00610 UVector32 *fGroupMap;
00611
00612
00613 UnicodeSet **fStaticSets;
00614
00615
00616 Regex8BitSet *fStaticSets8;
00617
00618
00619 int32_t fStartType;
00620 int32_t fInitialStringIdx;
00621 int32_t fInitialStringLen;
00622 UnicodeSet *fInitialChars;
00623 UChar32 fInitialChar;
00624 Regex8BitSet *fInitialChars8;
00625 UBool fNeedsAltInput;
00626
00627 UHashtable *fNamedCaptureMap;
00628
00629 friend class RegexCompile;
00630 friend class RegexMatcher;
00631 friend class RegexCImpl;
00632
00633
00634
00635
00636 void init();
00637 void zap();
00638
00639 void dumpOp(int32_t index) const;
00640
00641 public:
00642 #ifndef U_HIDE_INTERNAL_API
00643
00647 void dumpPattern() const;
00648 #endif
00649 };
00650
00651
00652
00662 class U_I18N_API RegexMatcher U_FINAL : public UObject {
00663 public:
00664
00679 RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status);
00680
00696 RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
00697
00719 RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
00720 uint32_t flags, UErrorCode &status);
00721
00743 RegexMatcher(UText *regexp, UText *input,
00744 uint32_t flags, UErrorCode &status);
00745
00746 private:
00759 RegexMatcher(const UnicodeString ®exp, const UChar *input,
00760 uint32_t flags, UErrorCode &status);
00761 public:
00762
00763
00769 virtual ~RegexMatcher();
00770
00771
00778 virtual UBool matches(UErrorCode &status);
00779
00780
00791 virtual UBool matches(int64_t startIndex, UErrorCode &status);
00792
00793
00807 virtual UBool lookingAt(UErrorCode &status);
00808
00809
00823 virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
00824
00825
00838 virtual UBool find();
00839
00840
00854 virtual UBool find(UErrorCode &status);
00855
00865 virtual UBool find(int64_t start, UErrorCode &status);
00866
00867
00877 virtual UnicodeString group(UErrorCode &status) const;
00878
00879
00897 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00898
00904 virtual int32_t groupCount() const;
00905
00906
00921 virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
00922
00943 virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
00944
00952 virtual int32_t start(UErrorCode &status) const;
00953
00961 virtual int64_t start64(UErrorCode &status) const;
00962
00963
00977 virtual int32_t start(int32_t group, UErrorCode &status) const;
00978
00992 virtual int64_t start64(int32_t group, UErrorCode &status) const;
00993
01007 virtual int32_t end(UErrorCode &status) const;
01008
01022 virtual int64_t end64(UErrorCode &status) const;
01023
01024
01042 virtual int32_t end(int32_t group, UErrorCode &status) const;
01043
01061 virtual int64_t end64(int32_t group, UErrorCode &status) const;
01062
01071 virtual RegexMatcher &reset();
01072
01073
01089 virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
01090
01091
01109 virtual RegexMatcher &reset(const UnicodeString &input);
01110
01111
01125 virtual RegexMatcher &reset(UText *input);
01126
01127
01152 virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
01153
01154 private:
01167 RegexMatcher &reset(const UChar *input);
01168 public:
01169
01177 virtual const UnicodeString &input() const;
01178
01187 virtual UText *inputText() const;
01188
01199 virtual UText *getInput(UText *dest, UErrorCode &status) const;
01200
01201
01220 virtual RegexMatcher ®ion(int64_t start, int64_t limit, UErrorCode &status);
01221
01233 virtual RegexMatcher ®ion(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
01234
01243 virtual int32_t regionStart() const;
01244
01253 virtual int64_t regionStart64() const;
01254
01255
01264 virtual int32_t regionEnd() const;
01265
01274 virtual int64_t regionEnd64() const;
01275
01284 virtual UBool hasTransparentBounds() const;
01285
01304 virtual RegexMatcher &useTransparentBounds(UBool b);
01305
01306
01314 virtual UBool hasAnchoringBounds() const;
01315
01316
01329 virtual RegexMatcher &useAnchoringBounds(UBool b);
01330
01331
01344 virtual UBool hitEnd() const;
01345
01355 virtual UBool requireEnd() const;
01356
01357
01363 virtual const RegexPattern &pattern() const;
01364
01365
01382 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
01383
01384
01405 virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
01406
01407
01428 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
01429
01430
01455 virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
01456
01457
01485 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
01486 const UnicodeString &replacement, UErrorCode &status);
01487
01488
01516 virtual RegexMatcher &appendReplacement(UText *dest,
01517 UText *replacement, UErrorCode &status);
01518
01519
01530 virtual UnicodeString &appendTail(UnicodeString &dest);
01531
01532
01546 virtual UText *appendTail(UText *dest, UErrorCode &status);
01547
01548
01572 virtual int32_t split(const UnicodeString &input,
01573 UnicodeString dest[],
01574 int32_t destCapacity,
01575 UErrorCode &status);
01576
01577
01601 virtual int32_t split(UText *input,
01602 UText *dest[],
01603 int32_t destCapacity,
01604 UErrorCode &status);
01605
01627 virtual void setTimeLimit(int32_t limit, UErrorCode &status);
01628
01635 virtual int32_t getTimeLimit() const;
01636
01658 virtual void setStackLimit(int32_t limit, UErrorCode &status);
01659
01667 virtual int32_t getStackLimit() const;
01668
01669
01683 virtual void setMatchCallback(URegexMatchCallback *callback,
01684 const void *context,
01685 UErrorCode &status);
01686
01687
01698 virtual void getMatchCallback(URegexMatchCallback *&callback,
01699 const void *&context,
01700 UErrorCode &status);
01701
01702
01716 virtual void setFindProgressCallback(URegexFindProgressCallback *callback,
01717 const void *context,
01718 UErrorCode &status);
01719
01720
01731 virtual void getFindProgressCallback(URegexFindProgressCallback *&callback,
01732 const void *&context,
01733 UErrorCode &status);
01734
01735 #ifndef U_HIDE_INTERNAL_API
01736
01741 void setTrace(UBool state);
01742 #endif
01743
01749 static UClassID U_EXPORT2 getStaticClassID();
01750
01756 virtual UClassID getDynamicClassID() const;
01757
01758 private:
01759
01760
01761 RegexMatcher();
01762 RegexMatcher(const RegexPattern *pat);
01763 RegexMatcher(const RegexMatcher &other);
01764 RegexMatcher &operator =(const RegexMatcher &rhs);
01765 void init(UErrorCode &status);
01766 void init2(UText *t, UErrorCode &e);
01767
01768 friend class RegexPattern;
01769 friend class RegexCImpl;
01770 public:
01771 #ifndef U_HIDE_INTERNAL_API
01772
01773 void resetPreserveRegion();
01774 #endif
01775 private:
01776
01777
01778
01779
01780
01781 void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
01782 inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
01783 UBool isWordBoundary(int64_t pos);
01784 UBool isUWordBoundary(int64_t pos);
01785 REStackFrame *resetStack();
01786 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
01787 void IncrementTime(UErrorCode &status);
01788
01789
01790 inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
01791
01792 int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
01793
01794 UBool findUsingChunk(UErrorCode &status);
01795 void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
01796 UBool isChunkWordBoundary(int32_t pos);
01797
01798 const RegexPattern *fPattern;
01799 RegexPattern *fPatternOwned;
01800
01801
01802 const UnicodeString *fInput;
01803 UText *fInputText;
01804 UText *fAltInputText;
01805
01806 int64_t fInputLength;
01807 int32_t fFrameSize;
01808
01809 int64_t fRegionStart;
01810 int64_t fRegionLimit;
01811
01812 int64_t fAnchorStart;
01813 int64_t fAnchorLimit;
01814
01815 int64_t fLookStart;
01816 int64_t fLookLimit;
01817
01818
01819 int64_t fActiveStart;
01820 int64_t fActiveLimit;
01821
01822
01823
01824 UBool fTransparentBounds;
01825 UBool fAnchoringBounds;
01826
01827 UBool fMatch;
01828 int64_t fMatchStart;
01829 int64_t fMatchEnd;
01830
01831
01832 int64_t fLastMatchEnd;
01833
01834 int64_t fAppendPosition;
01835
01836
01837
01838 UBool fHitEnd;
01839 UBool fRequireEnd;
01840
01841
01842 UVector64 *fStack;
01843 REStackFrame *fFrame;
01844
01845
01846
01847 int64_t *fData;
01848 int64_t fSmallData[8];
01849
01850 int32_t fTimeLimit;
01851
01852
01853 int32_t fTime;
01854 int32_t fTickCounter;
01855
01856
01857
01858
01859 int32_t fStackLimit;
01860
01861
01862 URegexMatchCallback *fCallbackFn;
01863
01864 const void *fCallbackContext;
01865
01866 URegexFindProgressCallback *fFindProgressCallbackFn;
01867
01868 const void *fFindProgressCallbackContext;
01869
01870
01871 UBool fInputUniStrMaybeMutable;
01872
01873 UBool fTraceDebug;
01874
01875 UErrorCode fDeferredStatus;
01876
01877
01878 RuleBasedBreakIterator *fWordBreakItr;
01879 };
01880
01881 U_NAMESPACE_END
01882 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
01883 #endif