00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef RBBI_H
00017 #define RBBI_H
00018
00019 #include "unicode/utypes.h"
00020
00026 #if !UCONFIG_NO_BREAK_ITERATION
00027
00028 #include "unicode/brkiter.h"
00029 #include "unicode/udata.h"
00030 #include "unicode/parseerr.h"
00031 #include "unicode/schriter.h"
00032 #include "unicode/uchriter.h"
00033
00034 U_NAMESPACE_BEGIN
00035
00037 class LanguageBreakEngine;
00038 struct RBBIDataHeader;
00039 class RBBIDataWrapper;
00040 class UnhandledEngine;
00041 class UStack;
00042
00054 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00055
00056 private:
00061 UText *fText;
00062
00068 CharacterIterator *fCharIter;
00069
00075 StringCharacterIterator *fSCharIter;
00076
00082 UCharCharacterIterator *fDCharIter;
00083
00088 RBBIDataWrapper *fData;
00089
00101 int32_t fPosition;
00102
00106 int32_t fRuleStatusIndex;
00107
00111 UBool fDone;
00112
00116 public:
00117 class BreakCache;
00118 BreakCache *fBreakCache;
00119 private:
00125 uint32_t fDictionaryCharCount;
00126
00131 class DictionaryCache;
00132 DictionaryCache *fDictionaryCache;
00133
00141 UStack *fLanguageBreakEngines;
00142
00150 UnhandledEngine *fUnhandledBreakEngine;
00151
00157 int32_t fBreakType;
00158
00159
00160
00161
00162
00173 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00174
00176 friend class RBBIRuleBuilder;
00178 friend class BreakIterator;
00179
00180 public:
00181
00186 RuleBasedBreakIterator();
00187
00194 RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00195
00204 RuleBasedBreakIterator( const UnicodeString &rules,
00205 UParseError &parseError,
00206 UErrorCode &status);
00207
00231 RuleBasedBreakIterator(const uint8_t *compiledRules,
00232 uint32_t ruleLength,
00233 UErrorCode &status);
00234
00247 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00248
00253 virtual ~RuleBasedBreakIterator();
00254
00262 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00263
00272 virtual UBool operator==(const BreakIterator& that) const;
00273
00281 UBool operator!=(const BreakIterator& that) const;
00282
00293 virtual BreakIterator* clone() const;
00294
00300 virtual int32_t hashCode(void) const;
00301
00307 virtual const UnicodeString& getRules(void) const;
00308
00309
00310
00311
00312
00338 virtual CharacterIterator& getText(void) const;
00339
00340
00355 virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00356
00364 virtual void adoptText(CharacterIterator* newText);
00365
00377 virtual void setText(const UnicodeString& newText);
00378
00392 virtual void setText(UText *text, UErrorCode &status);
00393
00399 virtual int32_t first(void);
00400
00406 virtual int32_t last(void);
00407
00418 virtual int32_t next(int32_t n);
00419
00425 virtual int32_t next(void);
00426
00432 virtual int32_t previous(void);
00433
00441 virtual int32_t following(int32_t offset);
00442
00450 virtual int32_t preceding(int32_t offset);
00451
00460 virtual UBool isBoundary(int32_t offset);
00461
00470 virtual int32_t current(void) const;
00471
00472
00506 virtual int32_t getRuleStatus() const;
00507
00531 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00532
00544 virtual UClassID getDynamicClassID(void) const;
00545
00557 static UClassID U_EXPORT2 getStaticClassID(void);
00558
00585 virtual BreakIterator * createBufferClone(void *stackBuffer,
00586 int32_t &BufferSize,
00587 UErrorCode &status);
00588
00589
00607 virtual const uint8_t *getBinaryRules(uint32_t &length);
00608
00634 virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
00635
00636
00637 private:
00638
00639
00640
00646 void reset(void);
00647
00652 void setBreakType(int32_t type);
00653
00658 void init(UErrorCode &status);
00659
00668 int32_t handlePrevious(int32_t fromPosition);
00669
00682 int32_t handleNext();
00683
00684
00691 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00692
00693 public:
00694 #ifndef U_HIDE_INTERNAL_API
00695
00699 void dumpCache();
00700 #endif
00701 };
00702
00703
00704
00705
00706
00707
00708
00709 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00710 return !operator==(that);
00711 }
00712
00713 U_NAMESPACE_END
00714
00715 #endif
00716
00717 #endif