00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef __UCHARSTRIE_H__
00018 #define __UCHARSTRIE_H__
00019
00026 #include "unicode/utypes.h"
00027
00028 #if U_SHOW_CPLUSPLUS_API
00029
00030 #include "unicode/unistr.h"
00031 #include "unicode/uobject.h"
00032 #include "unicode/ustringtrie.h"
00033
00034 U_NAMESPACE_BEGIN
00035
00036 class Appendable;
00037 class UCharsTrieBuilder;
00038 class UVector32;
00039
00053 class U_COMMON_API UCharsTrie : public UMemory {
00054 public:
00069 UCharsTrie(ConstChar16Ptr trieUChars)
00070 : ownedArray_(NULL), uchars_(trieUChars),
00071 pos_(uchars_), remainingMatchLength_(-1) {}
00072
00077 ~UCharsTrie();
00078
00085 UCharsTrie(const UCharsTrie &other)
00086 : ownedArray_(NULL), uchars_(other.uchars_),
00087 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
00088
00094 UCharsTrie &reset() {
00095 pos_=uchars_;
00096 remainingMatchLength_=-1;
00097 return *this;
00098 }
00099
00100 #ifndef U_HIDE_DRAFT_API
00101
00109 uint64_t getState64() const {
00110 return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
00111 (uint64_t)(pos_ - uchars_);
00112 }
00113
00128 UCharsTrie &resetToState64(uint64_t state) {
00129 remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
00130 pos_ = uchars_ + (state & kState64PosMask);
00131 return *this;
00132 }
00133 #endif
00134
00140 class State : public UMemory {
00141 public:
00146 State() { uchars=NULL; }
00147 private:
00148 friend class UCharsTrie;
00149
00150 const char16_t *uchars;
00151 const char16_t *pos;
00152 int32_t remainingMatchLength;
00153 };
00154
00162 const UCharsTrie &saveState(State &state) const {
00163 state.uchars=uchars_;
00164 state.pos=pos_;
00165 state.remainingMatchLength=remainingMatchLength_;
00166 return *this;
00167 }
00168
00179 UCharsTrie &resetToState(const State &state) {
00180 if(uchars_==state.uchars && uchars_!=NULL) {
00181 pos_=state.pos;
00182 remainingMatchLength_=state.remainingMatchLength;
00183 }
00184 return *this;
00185 }
00186
00193 UStringTrieResult current() const;
00194
00202 inline UStringTrieResult first(int32_t uchar) {
00203 remainingMatchLength_=-1;
00204 return nextImpl(uchars_, uchar);
00205 }
00206
00215 UStringTrieResult firstForCodePoint(UChar32 cp);
00216
00223 UStringTrieResult next(int32_t uchar);
00224
00232 UStringTrieResult nextForCodePoint(UChar32 cp);
00233
00249 UStringTrieResult next(ConstChar16Ptr s, int32_t length);
00250
00260 inline int32_t getValue() const {
00261 const char16_t *pos=pos_;
00262 int32_t leadUnit=*pos++;
00263
00264 return leadUnit&kValueIsFinal ?
00265 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
00266 }
00267
00277 inline UBool hasUniqueValue(int32_t &uniqueValue) const {
00278 const char16_t *pos=pos_;
00279
00280 return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
00281 }
00282
00290 int32_t getNextUChars(Appendable &out) const;
00291
00296 class U_COMMON_API Iterator : public UMemory {
00297 public:
00309 Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
00310
00322 Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
00323
00328 ~Iterator();
00329
00335 Iterator &reset();
00336
00341 UBool hasNext() const;
00342
00357 UBool next(UErrorCode &errorCode);
00358
00363 const UnicodeString &getString() const { return str_; }
00368 int32_t getValue() const { return value_; }
00369
00370 private:
00371 UBool truncateAndStop() {
00372 pos_=NULL;
00373 value_=-1;
00374 return TRUE;
00375 }
00376
00377 const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode);
00378
00379 const char16_t *uchars_;
00380 const char16_t *pos_;
00381 const char16_t *initialPos_;
00382 int32_t remainingMatchLength_;
00383 int32_t initialRemainingMatchLength_;
00384 UBool skipValue_;
00385
00386 UnicodeString str_;
00387 int32_t maxLength_;
00388 int32_t value_;
00389
00390
00391
00392
00393
00394
00395
00396
00397 UVector32 *stack_;
00398 };
00399
00400 private:
00401 friend class UCharsTrieBuilder;
00402
00409 UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars)
00410 : ownedArray_(adoptUChars), uchars_(trieUChars),
00411 pos_(uchars_), remainingMatchLength_(-1) {}
00412
00413
00414 UCharsTrie &operator=(const UCharsTrie &other);
00415
00416 inline void stop() {
00417 pos_=NULL;
00418 }
00419
00420
00421
00422 static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) {
00423 int32_t value;
00424 if(leadUnit<kMinTwoUnitValueLead) {
00425 value=leadUnit;
00426 } else if(leadUnit<kThreeUnitValueLead) {
00427 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
00428 } else {
00429 value=(pos[0]<<16)|pos[1];
00430 }
00431 return value;
00432 }
00433 static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) {
00434 if(leadUnit>=kMinTwoUnitValueLead) {
00435 if(leadUnit<kThreeUnitValueLead) {
00436 ++pos;
00437 } else {
00438 pos+=2;
00439 }
00440 }
00441 return pos;
00442 }
00443 static inline const char16_t *skipValue(const char16_t *pos) {
00444 int32_t leadUnit=*pos++;
00445 return skipValue(pos, leadUnit&0x7fff);
00446 }
00447
00448 static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) {
00449
00450 int32_t value;
00451 if(leadUnit<kMinTwoUnitNodeValueLead) {
00452 value=(leadUnit>>6)-1;
00453 } else if(leadUnit<kThreeUnitNodeValueLead) {
00454 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
00455 } else {
00456 value=(pos[0]<<16)|pos[1];
00457 }
00458 return value;
00459 }
00460 static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) {
00461
00462 if(leadUnit>=kMinTwoUnitNodeValueLead) {
00463 if(leadUnit<kThreeUnitNodeValueLead) {
00464 ++pos;
00465 } else {
00466 pos+=2;
00467 }
00468 }
00469 return pos;
00470 }
00471
00472 static inline const char16_t *jumpByDelta(const char16_t *pos) {
00473 int32_t delta=*pos++;
00474 if(delta>=kMinTwoUnitDeltaLead) {
00475 if(delta==kThreeUnitDeltaLead) {
00476 delta=(pos[0]<<16)|pos[1];
00477 pos+=2;
00478 } else {
00479 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
00480 }
00481 }
00482 return pos+delta;
00483 }
00484
00485 static const char16_t *skipDelta(const char16_t *pos) {
00486 int32_t delta=*pos++;
00487 if(delta>=kMinTwoUnitDeltaLead) {
00488 if(delta==kThreeUnitDeltaLead) {
00489 pos+=2;
00490 } else {
00491 ++pos;
00492 }
00493 }
00494 return pos;
00495 }
00496
00497 static inline UStringTrieResult valueResult(int32_t node) {
00498 return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
00499 }
00500
00501
00502 UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar);
00503
00504
00505 UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar);
00506
00507
00508
00509
00510 static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length,
00511 UBool haveUniqueValue, int32_t &uniqueValue);
00512
00513
00514 static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
00515
00516
00517
00518 static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out);
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563 static const int32_t kMaxBranchLinearSubNodeLength=5;
00564
00565
00566 static const int32_t kMinLinearMatch=0x30;
00567 static const int32_t kMaxLinearMatchLength=0x10;
00568
00569
00570
00571
00572 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
00573 static const int32_t kNodeTypeMask=kMinValueLead-1;
00574
00575
00576 static const int32_t kValueIsFinal=0x8000;
00577
00578
00579 static const int32_t kMaxOneUnitValue=0x3fff;
00580
00581 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
00582 static const int32_t kThreeUnitValueLead=0x7fff;
00583
00584 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
00585
00586
00587 static const int32_t kMaxOneUnitNodeValue=0xff;
00588 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
00589 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
00590
00591 static const int32_t kMaxTwoUnitNodeValue=
00592 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
00593
00594
00595 static const int32_t kMaxOneUnitDelta=0xfbff;
00596 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
00597 static const int32_t kThreeUnitDeltaLead=0xffff;
00598
00599 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
00600
00601
00602
00603
00604
00605 static constexpr int32_t kState64RemainingShift = 59;
00606 static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
00607
00608 char16_t *ownedArray_;
00609
00610
00611 const char16_t *uchars_;
00612
00613
00614
00615
00616 const char16_t *pos_;
00617
00618 int32_t remainingMatchLength_;
00619 };
00620
00621 U_NAMESPACE_END
00622
00623 #endif
00624
00625 #endif // __UCHARSTRIE_H__