00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef __STRINGTRIEBUILDER_H__
00018 #define __STRINGTRIEBUILDER_H__
00019
00020 #include "unicode/utypes.h"
00021 #include "unicode/uobject.h"
00022
00028
00030 struct UHashtable;
00031 typedef struct UHashtable UHashtable;
00033
00038 enum UStringTrieBuildOption {
00043 USTRINGTRIE_BUILD_FAST,
00054 USTRINGTRIE_BUILD_SMALL
00055 };
00056
00057 U_NAMESPACE_BEGIN
00058
00065 class U_COMMON_API StringTrieBuilder : public UObject {
00066 public:
00067 #ifndef U_HIDE_INTERNAL_API
00068
00069 static int32_t hashNode(const void *node);
00071 static UBool equalNodes(const void *left, const void *right);
00072 #endif
00073
00074 protected:
00075
00076
00078 StringTrieBuilder();
00080 virtual ~StringTrieBuilder();
00081
00082 #ifndef U_HIDE_INTERNAL_API
00083
00084 void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
00086 void deleteCompactBuilder();
00087
00089 void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
00090
00092 int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
00094 int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
00095 #endif
00096
00097 class Node;
00098
00099 #ifndef U_HIDE_INTERNAL_API
00100
00101 Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
00103 Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
00104 int32_t length, UErrorCode &errorCode);
00105 #endif
00106
00108 virtual int32_t getElementStringLength(int32_t i) const = 0;
00110 virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
00112 virtual int32_t getElementValue(int32_t i) const = 0;
00113
00114
00115
00117 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
00118
00119
00121 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
00123 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
00125 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
00126
00128 virtual UBool matchNodesCanHaveValues() const = 0;
00129
00131 virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
00133 virtual int32_t getMinLinearMatch() const = 0;
00135 virtual int32_t getMaxLinearMatchLength() const = 0;
00136
00137 #ifndef U_HIDE_INTERNAL_API
00138
00140 static const int32_t kMaxBranchLinearSubNodeLength=5;
00141
00142
00143
00145 static const int32_t kMaxSplitBranchLevels=14;
00146
00157 Node *registerNode(Node *newNode, UErrorCode &errorCode);
00168 Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
00169 #endif
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00189 UHashtable *nodes;
00190
00191
00192
00197 class Node : public UObject {
00198 public:
00199 Node(int32_t initialHash) : hash(initialHash), offset(0) {}
00200 inline int32_t hashCode() const { return hash; }
00201
00202 static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
00203
00204 virtual UBool operator==(const Node &other) const;
00205 inline UBool operator!=(const Node &other) const { return !operator==(other); }
00233 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00234
00235 virtual void write(StringTrieBuilder &builder) = 0;
00236
00237 inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
00238 StringTrieBuilder &builder) {
00239
00240
00241
00242
00243
00244 if(offset<0 && (offset<lastRight || firstRight<offset)) {
00245 write(builder);
00246 }
00247 }
00248 inline int32_t getOffset() const { return offset; }
00249 protected:
00250 int32_t hash;
00251 int32_t offset;
00252 };
00253
00254 #ifndef U_HIDE_INTERNAL_API
00255
00256
00257
00258
00259
00260
00262 class FinalValueNode : public Node {
00263 public:
00264 FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
00265 virtual UBool operator==(const Node &other) const;
00266 virtual void write(StringTrieBuilder &builder);
00267 protected:
00268 int32_t value;
00269 };
00270 #endif
00271
00272
00273
00277 class ValueNode : public Node {
00278 public:
00279 ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
00280 virtual UBool operator==(const Node &other) const;
00281 void setValue(int32_t v) {
00282 hasValue=TRUE;
00283 value=v;
00284 hash=hash*37u+v;
00285 }
00286 protected:
00287 UBool hasValue;
00288 int32_t value;
00289 };
00290
00291 #ifndef U_HIDE_INTERNAL_API
00292
00295 class IntermediateValueNode : public ValueNode {
00296 public:
00297 IntermediateValueNode(int32_t v, Node *nextNode)
00298 : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
00299 virtual UBool operator==(const Node &other) const;
00300 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00301 virtual void write(StringTrieBuilder &builder);
00302 protected:
00303 Node *next;
00304 };
00305 #endif
00306
00307
00308
00312 class LinearMatchNode : public ValueNode {
00313 public:
00314 LinearMatchNode(int32_t len, Node *nextNode)
00315 : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
00316 length(len), next(nextNode) {}
00317 virtual UBool operator==(const Node &other) const;
00318 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00319 protected:
00320 int32_t length;
00321 Node *next;
00322 };
00323
00324 #ifndef U_HIDE_INTERNAL_API
00325
00328 class BranchNode : public Node {
00329 public:
00330 BranchNode(int32_t initialHash) : Node(initialHash) {}
00331 protected:
00332 int32_t firstEdgeNumber;
00333 };
00334
00338 class ListBranchNode : public BranchNode {
00339 public:
00340 ListBranchNode() : BranchNode(0x444444), length(0) {}
00341 virtual UBool operator==(const Node &other) const;
00342 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00343 virtual void write(StringTrieBuilder &builder);
00344
00345 void add(int32_t c, int32_t value) {
00346 units[length]=(char16_t)c;
00347 equal[length]=NULL;
00348 values[length]=value;
00349 ++length;
00350 hash=(hash*37u+c)*37u+value;
00351 }
00352
00353 void add(int32_t c, Node *node) {
00354 units[length]=(char16_t)c;
00355 equal[length]=node;
00356 values[length]=0;
00357 ++length;
00358 hash=(hash*37u+c)*37u+hashCode(node);
00359 }
00360 protected:
00361 Node *equal[kMaxBranchLinearSubNodeLength];
00362 int32_t length;
00363 int32_t values[kMaxBranchLinearSubNodeLength];
00364 char16_t units[kMaxBranchLinearSubNodeLength];
00365 };
00366
00370 class SplitBranchNode : public BranchNode {
00371 public:
00372 SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
00373 : BranchNode(((0x555555u*37u+middleUnit)*37u+
00374 hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
00375 unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
00376 virtual UBool operator==(const Node &other) const;
00377 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00378 virtual void write(StringTrieBuilder &builder);
00379 protected:
00380 char16_t unit;
00381 Node *lessThan;
00382 Node *greaterOrEqual;
00383 };
00384
00385
00387 class BranchHeadNode : public ValueNode {
00388 public:
00389 BranchHeadNode(int32_t len, Node *subNode)
00390 : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
00391 length(len), next(subNode) {}
00392 virtual UBool operator==(const Node &other) const;
00393 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00394 virtual void write(StringTrieBuilder &builder);
00395 protected:
00396 int32_t length;
00397 Node *next;
00398 };
00399
00400 #endif
00402
00403
00404 virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
00405 Node *nextNode) const = 0;
00406
00408 virtual int32_t write(int32_t unit) = 0;
00410 virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
00412 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
00414 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
00416 virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
00417 };
00418
00419 U_NAMESPACE_END
00420
00421 #endif // __STRINGTRIEBUILDER_H__