00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef __UCHARSTRIE_H__
00018 #define __UCHARSTRIE_H__
00019
00026 #include "unicode/utypes.h"
00027 #include "unicode/unistr.h"
00028 #include "unicode/uobject.h"
00029 #include "unicode/ustringtrie.h"
00030
00031 U_NAMESPACE_BEGIN
00032
00033 class Appendable;
00034 class UCharsTrieBuilder;
00035 class UVector32;
00036
00050 class U_COMMON_API UCharsTrie : public UMemory {
00051 public:
00066 UCharsTrie(ConstChar16Ptr trieUChars)
00067 : ownedArray_(NULL), uchars_(trieUChars),
00068 pos_(uchars_), remainingMatchLength_(-1) {}
00069
00074 ~UCharsTrie();
00075
00082 UCharsTrie(const UCharsTrie &other)
00083 : ownedArray_(NULL), uchars_(other.uchars_),
00084 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
00085
00091 UCharsTrie &reset() {
00092 pos_=uchars_;
00093 remainingMatchLength_=-1;
00094 return *this;
00095 }
00096
00102 class State : public UMemory {
00103 public:
00108 State() { uchars=NULL; }
00109 private:
00110 friend class UCharsTrie;
00111
00112 const char16_t *uchars;
00113 const char16_t *pos;
00114 int32_t remainingMatchLength;
00115 };
00116
00124 const UCharsTrie &saveState(State &state) const {
00125 state.uchars=uchars_;
00126 state.pos=pos_;
00127 state.remainingMatchLength=remainingMatchLength_;
00128 return *this;
00129 }
00130
00141 UCharsTrie &resetToState(const State &state) {
00142 if(uchars_==state.uchars && uchars_!=NULL) {
00143 pos_=state.pos;
00144 remainingMatchLength_=state.remainingMatchLength;
00145 }
00146 return *this;
00147 }
00148
00155 UStringTrieResult current() const;
00156
00164 inline UStringTrieResult first(int32_t uchar) {
00165 remainingMatchLength_=-1;
00166 return nextImpl(uchars_, uchar);
00167 }
00168
00177 UStringTrieResult firstForCodePoint(UChar32 cp);
00178
00185 UStringTrieResult next(int32_t uchar);
00186
00194 UStringTrieResult nextForCodePoint(UChar32 cp);
00195
00211 UStringTrieResult next(ConstChar16Ptr s, int32_t length);
00212
00222 inline int32_t getValue() const {
00223 const char16_t *pos=pos_;
00224 int32_t leadUnit=*pos++;
00225
00226 return leadUnit&kValueIsFinal ?
00227 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
00228 }
00229
00239 inline UBool hasUniqueValue(int32_t &uniqueValue) const {
00240 const char16_t *pos=pos_;
00241
00242 return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
00243 }
00244
00252 int32_t getNextUChars(Appendable &out) const;
00253
00258 class U_COMMON_API Iterator : public UMemory {
00259 public:
00271 Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
00272
00284 Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
00285
00290 ~Iterator();
00291
00297 Iterator &reset();
00298
00303 UBool hasNext() const;
00304
00319 UBool next(UErrorCode &errorCode);
00320
00325 const UnicodeString &getString() const { return str_; }
00330 int32_t getValue() const { return value_; }
00331
00332 private:
00333 UBool truncateAndStop() {
00334 pos_=NULL;
00335 value_=-1;
00336 return TRUE;
00337 }
00338
00339 const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode);
00340
00341 const char16_t *uchars_;
00342 const char16_t *pos_;
00343 const char16_t *initialPos_;
00344 int32_t remainingMatchLength_;
00345 int32_t initialRemainingMatchLength_;
00346 UBool skipValue_;
00347
00348 UnicodeString str_;
00349 int32_t maxLength_;
00350 int32_t value_;
00351
00352
00353
00354
00355
00356
00357
00358
00359 UVector32 *stack_;
00360 };
00361
00362 private:
00363 friend class UCharsTrieBuilder;
00364
00371 UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars)
00372 : ownedArray_(adoptUChars), uchars_(trieUChars),
00373 pos_(uchars_), remainingMatchLength_(-1) {}
00374
00375
00376 UCharsTrie &operator=(const UCharsTrie &other);
00377
00378 inline void stop() {
00379 pos_=NULL;
00380 }
00381
00382
00383
00384 static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) {
00385 int32_t value;
00386 if(leadUnit<kMinTwoUnitValueLead) {
00387 value=leadUnit;
00388 } else if(leadUnit<kThreeUnitValueLead) {
00389 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
00390 } else {
00391 value=(pos[0]<<16)|pos[1];
00392 }
00393 return value;
00394 }
00395 static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) {
00396 if(leadUnit>=kMinTwoUnitValueLead) {
00397 if(leadUnit<kThreeUnitValueLead) {
00398 ++pos;
00399 } else {
00400 pos+=2;
00401 }
00402 }
00403 return pos;
00404 }
00405 static inline const char16_t *skipValue(const char16_t *pos) {
00406 int32_t leadUnit=*pos++;
00407 return skipValue(pos, leadUnit&0x7fff);
00408 }
00409
00410 static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) {
00411
00412 int32_t value;
00413 if(leadUnit<kMinTwoUnitNodeValueLead) {
00414 value=(leadUnit>>6)-1;
00415 } else if(leadUnit<kThreeUnitNodeValueLead) {
00416 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
00417 } else {
00418 value=(pos[0]<<16)|pos[1];
00419 }
00420 return value;
00421 }
00422 static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) {
00423
00424 if(leadUnit>=kMinTwoUnitNodeValueLead) {
00425 if(leadUnit<kThreeUnitNodeValueLead) {
00426 ++pos;
00427 } else {
00428 pos+=2;
00429 }
00430 }
00431 return pos;
00432 }
00433
00434 static inline const char16_t *jumpByDelta(const char16_t *pos) {
00435 int32_t delta=*pos++;
00436 if(delta>=kMinTwoUnitDeltaLead) {
00437 if(delta==kThreeUnitDeltaLead) {
00438 delta=(pos[0]<<16)|pos[1];
00439 pos+=2;
00440 } else {
00441 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
00442 }
00443 }
00444 return pos+delta;
00445 }
00446
00447 static const char16_t *skipDelta(const char16_t *pos) {
00448 int32_t delta=*pos++;
00449 if(delta>=kMinTwoUnitDeltaLead) {
00450 if(delta==kThreeUnitDeltaLead) {
00451 pos+=2;
00452 } else {
00453 ++pos;
00454 }
00455 }
00456 return pos;
00457 }
00458
00459 static inline UStringTrieResult valueResult(int32_t node) {
00460 return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
00461 }
00462
00463
00464 UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar);
00465
00466
00467 UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar);
00468
00469
00470
00471
00472 static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length,
00473 UBool haveUniqueValue, int32_t &uniqueValue);
00474
00475
00476 static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
00477
00478
00479
00480 static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out);
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525 static const int32_t kMaxBranchLinearSubNodeLength=5;
00526
00527
00528 static const int32_t kMinLinearMatch=0x30;
00529 static const int32_t kMaxLinearMatchLength=0x10;
00530
00531
00532
00533
00534 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
00535 static const int32_t kNodeTypeMask=kMinValueLead-1;
00536
00537
00538 static const int32_t kValueIsFinal=0x8000;
00539
00540
00541 static const int32_t kMaxOneUnitValue=0x3fff;
00542
00543 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
00544 static const int32_t kThreeUnitValueLead=0x7fff;
00545
00546 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
00547
00548
00549 static const int32_t kMaxOneUnitNodeValue=0xff;
00550 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
00551 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
00552
00553 static const int32_t kMaxTwoUnitNodeValue=
00554 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
00555
00556
00557 static const int32_t kMaxOneUnitDelta=0xfbff;
00558 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
00559 static const int32_t kThreeUnitDeltaLead=0xffff;
00560
00561 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
00562
00563 char16_t *ownedArray_;
00564
00565
00566 const char16_t *uchars_;
00567
00568
00569
00570
00571 const char16_t *pos_;
00572
00573 int32_t remainingMatchLength_;
00574 };
00575
00576 U_NAMESPACE_END
00577
00578 #endif // __UCHARSTRIE_H__