00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef RBBI_H
00017 #define RBBI_H
00018
00019 #include "unicode/utypes.h"
00020
00026 #if !UCONFIG_NO_BREAK_ITERATION
00027
00028 #include "unicode/brkiter.h"
00029 #include "unicode/udata.h"
00030 #include "unicode/parseerr.h"
00031 #include "unicode/schriter.h"
00032
00033 U_NAMESPACE_BEGIN
00034
00036 class LanguageBreakEngine;
00037 struct RBBIDataHeader;
00038 class RBBIDataWrapper;
00039 class UnhandledEngine;
00040 class UStack;
00041
00053 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00054
00055 private:
00060 UText fText;
00061
00062 #ifndef U_HIDE_INTERNAL_API
00063 public:
00064 #endif
00065
00070 RBBIDataWrapper *fData;
00071 private:
00072
00077 int32_t fPosition;
00078
00082 int32_t fRuleStatusIndex;
00083
00087 class BreakCache;
00088 BreakCache *fBreakCache;
00089
00094 class DictionaryCache;
00095 DictionaryCache *fDictionaryCache;
00096
00104 UStack *fLanguageBreakEngines;
00105
00113 UnhandledEngine *fUnhandledBreakEngine;
00114
00120 uint32_t fDictionaryCharCount;
00121
00127 CharacterIterator *fCharIter;
00128
00134 StringCharacterIterator fSCharIter;
00135
00139 UBool fDone;
00140
00141
00142
00143
00144
00155 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00156
00158 friend class RBBIRuleBuilder;
00160 friend class BreakIterator;
00161
00162 public:
00163
00168 RuleBasedBreakIterator();
00169
00176 RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00177
00186 RuleBasedBreakIterator( const UnicodeString &rules,
00187 UParseError &parseError,
00188 UErrorCode &status);
00189
00213 RuleBasedBreakIterator(const uint8_t *compiledRules,
00214 uint32_t ruleLength,
00215 UErrorCode &status);
00216
00229 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00230
00235 virtual ~RuleBasedBreakIterator();
00236
00244 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00245
00254 virtual UBool operator==(const BreakIterator& that) const;
00255
00263 UBool operator!=(const BreakIterator& that) const;
00264
00275 virtual BreakIterator* clone() const;
00276
00282 virtual int32_t hashCode(void) const;
00283
00289 virtual const UnicodeString& getRules(void) const;
00290
00291
00292
00293
00294
00320 virtual CharacterIterator& getText(void) const;
00321
00322
00337 virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00338
00346 virtual void adoptText(CharacterIterator* newText);
00347
00359 virtual void setText(const UnicodeString& newText);
00360
00374 virtual void setText(UText *text, UErrorCode &status);
00375
00381 virtual int32_t first(void);
00382
00388 virtual int32_t last(void);
00389
00400 virtual int32_t next(int32_t n);
00401
00407 virtual int32_t next(void);
00408
00414 virtual int32_t previous(void);
00415
00423 virtual int32_t following(int32_t offset);
00424
00432 virtual int32_t preceding(int32_t offset);
00433
00442 virtual UBool isBoundary(int32_t offset);
00443
00452 virtual int32_t current(void) const;
00453
00454
00486 virtual int32_t getRuleStatus() const;
00487
00511 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00512
00524 virtual UClassID getDynamicClassID(void) const;
00525
00537 static UClassID U_EXPORT2 getStaticClassID(void);
00538
00565 virtual BreakIterator * createBufferClone(void *stackBuffer,
00566 int32_t &BufferSize,
00567 UErrorCode &status);
00568
00569
00587 virtual const uint8_t *getBinaryRules(uint32_t &length);
00588
00614 virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
00615
00616
00617 private:
00618
00619
00620
00626 void reset(void);
00627
00632 void init(UErrorCode &status);
00633
00643 int32_t handleSafePrevious(int32_t fromPosition);
00644
00657 int32_t handleNext();
00658
00659
00666 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00667
00668 public:
00669 #ifndef U_HIDE_INTERNAL_API
00670
00674 void dumpCache();
00675
00680 void dumpTables();
00681
00682 #endif
00683 };
00684
00685
00686
00687
00688
00689
00690
00691 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00692 return !operator==(that);
00693 }
00694
00695 U_NAMESPACE_END
00696
00697 #endif
00698
00699 #endif