00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef REGEX_H
00019 #define REGEX_H
00020
00021
00022
00045 #include "unicode/utypes.h"
00046
00047 #if U_SHOW_CPLUSPLUS_API
00048
00049 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00050
00051 #include "unicode/uobject.h"
00052 #include "unicode/unistr.h"
00053 #include "unicode/utext.h"
00054 #include "unicode/parseerr.h"
00055
00056 #include "unicode/uregex.h"
00057
00058
00059
00060 struct UHashtable;
00061
00062 U_NAMESPACE_BEGIN
00063
00064 struct Regex8BitSet;
00065 class RegexCImpl;
00066 class RegexMatcher;
00067 class RegexPattern;
00068 struct REStackFrame;
00069 class BreakIterator;
00070 class UnicodeSet;
00071 class UVector;
00072 class UVector32;
00073 class UVector64;
00074
00075
00087 class U_I18N_API RegexPattern U_FINAL : public UObject {
00088 public:
00089
00097 RegexPattern();
00098
00105 RegexPattern(const RegexPattern &source);
00106
00112 virtual ~RegexPattern();
00113
00122 UBool operator==(const RegexPattern& that) const;
00123
00132 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
00133
00139 RegexPattern &operator =(const RegexPattern &source);
00140
00148 virtual RegexPattern *clone() const;
00149
00150
00175 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00176 UParseError &pe,
00177 UErrorCode &status);
00178
00205 static RegexPattern * U_EXPORT2 compile( UText *regex,
00206 UParseError &pe,
00207 UErrorCode &status);
00208
00233 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00234 uint32_t flags,
00235 UParseError &pe,
00236 UErrorCode &status);
00237
00264 static RegexPattern * U_EXPORT2 compile( UText *regex,
00265 uint32_t flags,
00266 UParseError &pe,
00267 UErrorCode &status);
00268
00291 static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex,
00292 uint32_t flags,
00293 UErrorCode &status);
00294
00319 static RegexPattern * U_EXPORT2 compile( UText *regex,
00320 uint32_t flags,
00321 UErrorCode &status);
00322
00328 virtual uint32_t flags() const;
00329
00347 virtual RegexMatcher *matcher(const UnicodeString &input,
00348 UErrorCode &status) const;
00349
00350 private:
00363 RegexMatcher *matcher(const char16_t *input,
00364 UErrorCode &status) const;
00365 public:
00366
00367
00379 virtual RegexMatcher *matcher(UErrorCode &status) const;
00380
00381
00396 static UBool U_EXPORT2 matches(const UnicodeString ®ex,
00397 const UnicodeString &input,
00398 UParseError &pe,
00399 UErrorCode &status);
00400
00415 static UBool U_EXPORT2 matches(UText *regex,
00416 UText *input,
00417 UParseError &pe,
00418 UErrorCode &status);
00419
00428 virtual UnicodeString pattern() const;
00429
00430
00441 virtual UText *patternText(UErrorCode &status) const;
00442
00443
00457 virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
00458
00459
00476 virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
00477
00478
00517 virtual int32_t split(const UnicodeString &input,
00518 UnicodeString dest[],
00519 int32_t destCapacity,
00520 UErrorCode &status) const;
00521
00522
00561 virtual int32_t split(UText *input,
00562 UText *dest[],
00563 int32_t destCapacity,
00564 UErrorCode &status) const;
00565
00566
00572 virtual UClassID getDynamicClassID() const;
00573
00579 static UClassID U_EXPORT2 getStaticClassID();
00580
00581 private:
00582
00583
00584
00585 UText *fPattern;
00586 UnicodeString *fPatternString;
00587 uint32_t fFlags;
00588
00589 UVector64 *fCompiledPat;
00590 UnicodeString fLiteralText;
00591
00592
00593 UVector *fSets;
00594 Regex8BitSet *fSets8;
00595
00596
00597 UErrorCode fDeferredStatus;
00598
00599
00600 int32_t fMinMatchLen;
00601
00602
00603
00604
00605 int32_t fFrameSize;
00606
00607
00608 int32_t fDataSize;
00609
00610
00611
00612 UVector32 *fGroupMap;
00613
00614
00615 int32_t fStartType;
00616 int32_t fInitialStringIdx;
00617 int32_t fInitialStringLen;
00618 UnicodeSet *fInitialChars;
00619 UChar32 fInitialChar;
00620 Regex8BitSet *fInitialChars8;
00621 UBool fNeedsAltInput;
00622
00623 UHashtable *fNamedCaptureMap;
00624
00625 friend class RegexCompile;
00626 friend class RegexMatcher;
00627 friend class RegexCImpl;
00628
00629
00630
00631
00632 void init();
00633 bool initNamedCaptureMap();
00634 void zap();
00635
00636 void dumpOp(int32_t index) const;
00637
00638 public:
00639 #ifndef U_HIDE_INTERNAL_API
00640
00644 void dumpPattern() const;
00645 #endif
00646 };
00647
00648
00649
00659 class U_I18N_API RegexMatcher U_FINAL : public UObject {
00660 public:
00661
00675 RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status);
00676
00691 RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
00692
00713 RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
00714 uint32_t flags, UErrorCode &status);
00715
00736 RegexMatcher(UText *regexp, UText *input,
00737 uint32_t flags, UErrorCode &status);
00738
00739 private:
00751 RegexMatcher(const UnicodeString ®exp, const char16_t *input,
00752 uint32_t flags, UErrorCode &status);
00753 public:
00754
00755
00761 virtual ~RegexMatcher();
00762
00763
00770 virtual UBool matches(UErrorCode &status);
00771
00772
00783 virtual UBool matches(int64_t startIndex, UErrorCode &status);
00784
00785
00799 virtual UBool lookingAt(UErrorCode &status);
00800
00801
00815 virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
00816
00817
00830 virtual UBool find();
00831
00832
00847 virtual UBool find(UErrorCode &status);
00848
00858 virtual UBool find(int64_t start, UErrorCode &status);
00859
00860
00870 virtual UnicodeString group(UErrorCode &status) const;
00871
00872
00890 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00891
00897 virtual int32_t groupCount() const;
00898
00899
00914 virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
00915
00936 virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
00937
00945 virtual int32_t start(UErrorCode &status) const;
00946
00954 virtual int64_t start64(UErrorCode &status) const;
00955
00956
00970 virtual int32_t start(int32_t group, UErrorCode &status) const;
00971
00985 virtual int64_t start64(int32_t group, UErrorCode &status) const;
00986
01000 virtual int32_t end(UErrorCode &status) const;
01001
01015 virtual int64_t end64(UErrorCode &status) const;
01016
01017
01035 virtual int32_t end(int32_t group, UErrorCode &status) const;
01036
01054 virtual int64_t end64(int32_t group, UErrorCode &status) const;
01055
01064 virtual RegexMatcher &reset();
01065
01066
01082 virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
01083
01084
01102 virtual RegexMatcher &reset(const UnicodeString &input);
01103
01104
01118 virtual RegexMatcher &reset(UText *input);
01119
01120
01145 virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
01146
01147 private:
01160 RegexMatcher &reset(const char16_t *input);
01161 public:
01162
01170 virtual const UnicodeString &input() const;
01171
01180 virtual UText *inputText() const;
01181
01192 virtual UText *getInput(UText *dest, UErrorCode &status) const;
01193
01194
01213 virtual RegexMatcher ®ion(int64_t start, int64_t limit, UErrorCode &status);
01214
01226 virtual RegexMatcher ®ion(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
01227
01236 virtual int32_t regionStart() const;
01237
01246 virtual int64_t regionStart64() const;
01247
01248
01257 virtual int32_t regionEnd() const;
01258
01267 virtual int64_t regionEnd64() const;
01268
01277 virtual UBool hasTransparentBounds() const;
01278
01297 virtual RegexMatcher &useTransparentBounds(UBool b);
01298
01299
01307 virtual UBool hasAnchoringBounds() const;
01308
01309
01322 virtual RegexMatcher &useAnchoringBounds(UBool b);
01323
01324
01337 virtual UBool hitEnd() const;
01338
01348 virtual UBool requireEnd() const;
01349
01350
01356 virtual const RegexPattern &pattern() const;
01357
01358
01375 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
01376
01377
01398 virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
01399
01400
01421 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
01422
01423
01448 virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
01449
01450
01478 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
01479 const UnicodeString &replacement, UErrorCode &status);
01480
01481
01509 virtual RegexMatcher &appendReplacement(UText *dest,
01510 UText *replacement, UErrorCode &status);
01511
01512
01523 virtual UnicodeString &appendTail(UnicodeString &dest);
01524
01525
01539 virtual UText *appendTail(UText *dest, UErrorCode &status);
01540
01541
01565 virtual int32_t split(const UnicodeString &input,
01566 UnicodeString dest[],
01567 int32_t destCapacity,
01568 UErrorCode &status);
01569
01570
01594 virtual int32_t split(UText *input,
01595 UText *dest[],
01596 int32_t destCapacity,
01597 UErrorCode &status);
01598
01620 virtual void setTimeLimit(int32_t limit, UErrorCode &status);
01621
01628 virtual int32_t getTimeLimit() const;
01629
01651 virtual void setStackLimit(int32_t limit, UErrorCode &status);
01652
01660 virtual int32_t getStackLimit() const;
01661
01662
01676 virtual void setMatchCallback(URegexMatchCallback *callback,
01677 const void *context,
01678 UErrorCode &status);
01679
01680
01691 virtual void getMatchCallback(URegexMatchCallback *&callback,
01692 const void *&context,
01693 UErrorCode &status);
01694
01695
01709 virtual void setFindProgressCallback(URegexFindProgressCallback *callback,
01710 const void *context,
01711 UErrorCode &status);
01712
01713
01724 virtual void getFindProgressCallback(URegexFindProgressCallback *&callback,
01725 const void *&context,
01726 UErrorCode &status);
01727
01728 #ifndef U_HIDE_INTERNAL_API
01729
01734 void setTrace(UBool state);
01735 #endif
01736
01742 static UClassID U_EXPORT2 getStaticClassID();
01743
01749 virtual UClassID getDynamicClassID() const;
01750
01751 private:
01752
01753
01754 RegexMatcher();
01755 RegexMatcher(const RegexPattern *pat);
01756 RegexMatcher(const RegexMatcher &other);
01757 RegexMatcher &operator =(const RegexMatcher &rhs);
01758 void init(UErrorCode &status);
01759 void init2(UText *t, UErrorCode &e);
01760
01761 friend class RegexPattern;
01762 friend class RegexCImpl;
01763 public:
01764 #ifndef U_HIDE_INTERNAL_API
01765
01766 void resetPreserveRegion();
01767 #endif
01768 private:
01769
01770
01771
01772
01773
01774 void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
01775 inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
01776 UBool isWordBoundary(int64_t pos);
01777 UBool isUWordBoundary(int64_t pos, UErrorCode &status);
01778
01779 int64_t followingGCBoundary(int64_t pos, UErrorCode &status);
01780 REStackFrame *resetStack();
01781 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
01782 void IncrementTime(UErrorCode &status);
01783
01784
01785 inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
01786
01787 int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
01788
01789 UBool findUsingChunk(UErrorCode &status);
01790 void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
01791 UBool isChunkWordBoundary(int32_t pos);
01792
01793 const RegexPattern *fPattern;
01794 RegexPattern *fPatternOwned;
01795
01796
01797 const UnicodeString *fInput;
01798 UText *fInputText;
01799 UText *fAltInputText;
01800
01801 int64_t fInputLength;
01802 int32_t fFrameSize;
01803
01804 int64_t fRegionStart;
01805 int64_t fRegionLimit;
01806
01807 int64_t fAnchorStart;
01808 int64_t fAnchorLimit;
01809
01810 int64_t fLookStart;
01811 int64_t fLookLimit;
01812
01813
01814 int64_t fActiveStart;
01815 int64_t fActiveLimit;
01816
01817
01818
01819 UBool fTransparentBounds;
01820 UBool fAnchoringBounds;
01821
01822 UBool fMatch;
01823 int64_t fMatchStart;
01824 int64_t fMatchEnd;
01825
01826
01827 int64_t fLastMatchEnd;
01828
01829 int64_t fAppendPosition;
01830
01831
01832
01833 UBool fHitEnd;
01834 UBool fRequireEnd;
01835
01836
01837 UVector64 *fStack;
01838 REStackFrame *fFrame;
01839
01840
01841
01842 int64_t *fData;
01843 int64_t fSmallData[8];
01844
01845 int32_t fTimeLimit;
01846
01847
01848 int32_t fTime;
01849 int32_t fTickCounter;
01850
01851
01852
01853
01854 int32_t fStackLimit;
01855
01856
01857 URegexMatchCallback *fCallbackFn;
01858
01859 const void *fCallbackContext;
01860
01861 URegexFindProgressCallback *fFindProgressCallbackFn;
01862
01863 const void *fFindProgressCallbackContext;
01864
01865
01866 UBool fInputUniStrMaybeMutable;
01867
01868 UBool fTraceDebug;
01869
01870 UErrorCode fDeferredStatus;
01871
01872
01873 BreakIterator *fWordBreakItr;
01874 BreakIterator *fGCBreakItr;
01875 };
01876
01877 U_NAMESPACE_END
01878 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
01879
01880 #endif
01881
01882 #endif