00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef REGEX_H
00019 #define REGEX_H
00020
00021
00022
00045 #include "unicode/utypes.h"
00046
00047 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00048
00049 #include "unicode/uobject.h"
00050 #include "unicode/unistr.h"
00051 #include "unicode/utext.h"
00052 #include "unicode/parseerr.h"
00053
00054 #include "unicode/uregex.h"
00055
00056
00057
00058 struct UHashtable;
00059
00060 U_NAMESPACE_BEGIN
00061
00062 struct Regex8BitSet;
00063 class RegexCImpl;
00064 class RegexMatcher;
00065 class RegexPattern;
00066 struct REStackFrame;
00067 class RuleBasedBreakIterator;
00068 class UnicodeSet;
00069 class UVector;
00070 class UVector32;
00071 class UVector64;
00072
00073
00085 class U_I18N_API RegexPattern U_FINAL : public UObject {
00086 public:
00087
00095 RegexPattern();
00096
00103 RegexPattern(const RegexPattern &source);
00104
00110 virtual ~RegexPattern();
00111
00120 UBool operator==(const RegexPattern& that) const;
00121
00130 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
00131
00137 RegexPattern &operator =(const RegexPattern &source);
00138
00146 virtual RegexPattern *clone() const;
00147
00148
00173 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00174 UParseError &pe,
00175 UErrorCode &status);
00176
00203 static RegexPattern * U_EXPORT2 compile( UText *regex,
00204 UParseError &pe,
00205 UErrorCode &status);
00206
00231 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00232 uint32_t flags,
00233 UParseError &pe,
00234 UErrorCode &status);
00235
00262 static RegexPattern * U_EXPORT2 compile( UText *regex,
00263 uint32_t flags,
00264 UParseError &pe,
00265 UErrorCode &status);
00266
00289 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00290 uint32_t flags,
00291 UErrorCode &status);
00292
00317 static RegexPattern * U_EXPORT2 compile( UText *regex,
00318 uint32_t flags,
00319 UErrorCode &status);
00320
00326 virtual uint32_t flags() const;
00327
00345 virtual RegexMatcher *matcher(const UnicodeString &input,
00346 UErrorCode &status) const;
00347
00348 private:
00361 RegexMatcher *matcher(const char16_t *input,
00362 UErrorCode &status) const;
00363 public:
00364
00365
00377 virtual RegexMatcher *matcher(UErrorCode &status) const;
00378
00379
00394 static UBool U_EXPORT2 matches(const UnicodeString ®ex,
00395 const UnicodeString &input,
00396 UParseError &pe,
00397 UErrorCode &status);
00398
00413 static UBool U_EXPORT2 matches(UText *regex,
00414 UText *input,
00415 UParseError &pe,
00416 UErrorCode &status);
00417
00426 virtual UnicodeString pattern() const;
00427
00428
00439 virtual UText *patternText(UErrorCode &status) const;
00440
00441
00455 virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
00456
00457
00474 virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
00475
00476
00515 virtual int32_t split(const UnicodeString &input,
00516 UnicodeString dest[],
00517 int32_t destCapacity,
00518 UErrorCode &status) const;
00519
00520
00559 virtual int32_t split(UText *input,
00560 UText *dest[],
00561 int32_t destCapacity,
00562 UErrorCode &status) const;
00563
00564
00570 virtual UClassID getDynamicClassID() const;
00571
00577 static UClassID U_EXPORT2 getStaticClassID();
00578
00579 private:
00580
00581
00582
00583 UText *fPattern;
00584 UnicodeString *fPatternString;
00585 uint32_t fFlags;
00586
00587 UVector64 *fCompiledPat;
00588 UnicodeString fLiteralText;
00589
00590
00591 UVector *fSets;
00592 Regex8BitSet *fSets8;
00593
00594
00595 UErrorCode fDeferredStatus;
00596
00597
00598 int32_t fMinMatchLen;
00599
00600
00601
00602
00603 int32_t fFrameSize;
00604
00605
00606 int32_t fDataSize;
00607
00608
00609
00610 UVector32 *fGroupMap;
00611
00612
00613 UnicodeSet **fStaticSets;
00614
00615
00616 Regex8BitSet *fStaticSets8;
00617
00618
00619 int32_t fStartType;
00620 int32_t fInitialStringIdx;
00621 int32_t fInitialStringLen;
00622 UnicodeSet *fInitialChars;
00623 UChar32 fInitialChar;
00624 Regex8BitSet *fInitialChars8;
00625 UBool fNeedsAltInput;
00626
00627 UHashtable *fNamedCaptureMap;
00628
00629 friend class RegexCompile;
00630 friend class RegexMatcher;
00631 friend class RegexCImpl;
00632
00633
00634
00635
00636 void init();
00637 void zap();
00638
00639 void dumpOp(int32_t index) const;
00640
00641 public:
00642 #ifndef U_HIDE_INTERNAL_API
00643
00647 void dumpPattern() const;
00648 #endif
00649 };
00650
00651
00652
00662 class U_I18N_API RegexMatcher U_FINAL : public UObject {
00663 public:
00664
00678 RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status);
00679
00694 RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
00695
00716 RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
00717 uint32_t flags, UErrorCode &status);
00718
00739 RegexMatcher(UText *regexp, UText *input,
00740 uint32_t flags, UErrorCode &status);
00741
00742 private:
00754 RegexMatcher(const UnicodeString ®exp, const char16_t *input,
00755 uint32_t flags, UErrorCode &status);
00756 public:
00757
00758
00764 virtual ~RegexMatcher();
00765
00766
00773 virtual UBool matches(UErrorCode &status);
00774
00775
00786 virtual UBool matches(int64_t startIndex, UErrorCode &status);
00787
00788
00802 virtual UBool lookingAt(UErrorCode &status);
00803
00804
00818 virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
00819
00820
00833 virtual UBool find();
00834
00835
00850 virtual UBool find(UErrorCode &status);
00851
00861 virtual UBool find(int64_t start, UErrorCode &status);
00862
00863
00873 virtual UnicodeString group(UErrorCode &status) const;
00874
00875
00893 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00894
00900 virtual int32_t groupCount() const;
00901
00902
00917 virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
00918
00939 virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
00940
00948 virtual int32_t start(UErrorCode &status) const;
00949
00957 virtual int64_t start64(UErrorCode &status) const;
00958
00959
00973 virtual int32_t start(int32_t group, UErrorCode &status) const;
00974
00988 virtual int64_t start64(int32_t group, UErrorCode &status) const;
00989
01003 virtual int32_t end(UErrorCode &status) const;
01004
01018 virtual int64_t end64(UErrorCode &status) const;
01019
01020
01038 virtual int32_t end(int32_t group, UErrorCode &status) const;
01039
01057 virtual int64_t end64(int32_t group, UErrorCode &status) const;
01058
01067 virtual RegexMatcher &reset();
01068
01069
01085 virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
01086
01087
01105 virtual RegexMatcher &reset(const UnicodeString &input);
01106
01107
01121 virtual RegexMatcher &reset(UText *input);
01122
01123
01148 virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
01149
01150 private:
01163 RegexMatcher &reset(const char16_t *input);
01164 public:
01165
01173 virtual const UnicodeString &input() const;
01174
01183 virtual UText *inputText() const;
01184
01195 virtual UText *getInput(UText *dest, UErrorCode &status) const;
01196
01197
01216 virtual RegexMatcher ®ion(int64_t start, int64_t limit, UErrorCode &status);
01217
01229 virtual RegexMatcher ®ion(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
01230
01239 virtual int32_t regionStart() const;
01240
01249 virtual int64_t regionStart64() const;
01250
01251
01260 virtual int32_t regionEnd() const;
01261
01270 virtual int64_t regionEnd64() const;
01271
01280 virtual UBool hasTransparentBounds() const;
01281
01300 virtual RegexMatcher &useTransparentBounds(UBool b);
01301
01302
01310 virtual UBool hasAnchoringBounds() const;
01311
01312
01325 virtual RegexMatcher &useAnchoringBounds(UBool b);
01326
01327
01340 virtual UBool hitEnd() const;
01341
01351 virtual UBool requireEnd() const;
01352
01353
01359 virtual const RegexPattern &pattern() const;
01360
01361
01378 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
01379
01380
01401 virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
01402
01403
01424 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
01425
01426
01451 virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
01452
01453
01481 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
01482 const UnicodeString &replacement, UErrorCode &status);
01483
01484
01512 virtual RegexMatcher &appendReplacement(UText *dest,
01513 UText *replacement, UErrorCode &status);
01514
01515
01526 virtual UnicodeString &appendTail(UnicodeString &dest);
01527
01528
01542 virtual UText *appendTail(UText *dest, UErrorCode &status);
01543
01544
01568 virtual int32_t split(const UnicodeString &input,
01569 UnicodeString dest[],
01570 int32_t destCapacity,
01571 UErrorCode &status);
01572
01573
01597 virtual int32_t split(UText *input,
01598 UText *dest[],
01599 int32_t destCapacity,
01600 UErrorCode &status);
01601
01623 virtual void setTimeLimit(int32_t limit, UErrorCode &status);
01624
01631 virtual int32_t getTimeLimit() const;
01632
01654 virtual void setStackLimit(int32_t limit, UErrorCode &status);
01655
01663 virtual int32_t getStackLimit() const;
01664
01665
01679 virtual void setMatchCallback(URegexMatchCallback *callback,
01680 const void *context,
01681 UErrorCode &status);
01682
01683
01694 virtual void getMatchCallback(URegexMatchCallback *&callback,
01695 const void *&context,
01696 UErrorCode &status);
01697
01698
01712 virtual void setFindProgressCallback(URegexFindProgressCallback *callback,
01713 const void *context,
01714 UErrorCode &status);
01715
01716
01727 virtual void getFindProgressCallback(URegexFindProgressCallback *&callback,
01728 const void *&context,
01729 UErrorCode &status);
01730
01731 #ifndef U_HIDE_INTERNAL_API
01732
01737 void setTrace(UBool state);
01738 #endif
01739
01745 static UClassID U_EXPORT2 getStaticClassID();
01746
01752 virtual UClassID getDynamicClassID() const;
01753
01754 private:
01755
01756
01757 RegexMatcher();
01758 RegexMatcher(const RegexPattern *pat);
01759 RegexMatcher(const RegexMatcher &other);
01760 RegexMatcher &operator =(const RegexMatcher &rhs);
01761 void init(UErrorCode &status);
01762 void init2(UText *t, UErrorCode &e);
01763
01764 friend class RegexPattern;
01765 friend class RegexCImpl;
01766 public:
01767 #ifndef U_HIDE_INTERNAL_API
01768
01769 void resetPreserveRegion();
01770 #endif
01771 private:
01772
01773
01774
01775
01776
01777 void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
01778 inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
01779 UBool isWordBoundary(int64_t pos);
01780 UBool isUWordBoundary(int64_t pos);
01781 REStackFrame *resetStack();
01782 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
01783 void IncrementTime(UErrorCode &status);
01784
01785
01786 inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
01787
01788 int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
01789
01790 UBool findUsingChunk(UErrorCode &status);
01791 void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
01792 UBool isChunkWordBoundary(int32_t pos);
01793
01794 const RegexPattern *fPattern;
01795 RegexPattern *fPatternOwned;
01796
01797
01798 const UnicodeString *fInput;
01799 UText *fInputText;
01800 UText *fAltInputText;
01801
01802 int64_t fInputLength;
01803 int32_t fFrameSize;
01804
01805 int64_t fRegionStart;
01806 int64_t fRegionLimit;
01807
01808 int64_t fAnchorStart;
01809 int64_t fAnchorLimit;
01810
01811 int64_t fLookStart;
01812 int64_t fLookLimit;
01813
01814
01815 int64_t fActiveStart;
01816 int64_t fActiveLimit;
01817
01818
01819
01820 UBool fTransparentBounds;
01821 UBool fAnchoringBounds;
01822
01823 UBool fMatch;
01824 int64_t fMatchStart;
01825 int64_t fMatchEnd;
01826
01827
01828 int64_t fLastMatchEnd;
01829
01830 int64_t fAppendPosition;
01831
01832
01833
01834 UBool fHitEnd;
01835 UBool fRequireEnd;
01836
01837
01838 UVector64 *fStack;
01839 REStackFrame *fFrame;
01840
01841
01842
01843 int64_t *fData;
01844 int64_t fSmallData[8];
01845
01846 int32_t fTimeLimit;
01847
01848
01849 int32_t fTime;
01850 int32_t fTickCounter;
01851
01852
01853
01854
01855 int32_t fStackLimit;
01856
01857
01858 URegexMatchCallback *fCallbackFn;
01859
01860 const void *fCallbackContext;
01861
01862 URegexFindProgressCallback *fFindProgressCallbackFn;
01863
01864 const void *fFindProgressCallbackContext;
01865
01866
01867 UBool fInputUniStrMaybeMutable;
01868
01869 UBool fTraceDebug;
01870
01871 UErrorCode fDeferredStatus;
01872
01873
01874 RuleBasedBreakIterator *fWordBreakItr;
01875 };
01876
01877 U_NAMESPACE_END
01878 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
01879 #endif