00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef __STRINGTRIEBUILDER_H__
00016 #define __STRINGTRIEBUILDER_H__
00017
00018 #include "unicode/utypes.h"
00019 #include "unicode/uobject.h"
00020
00026
00027 struct UHashtable;
00028 typedef struct UHashtable UHashtable;
00029
00034 enum UStringTrieBuildOption {
00039 USTRINGTRIE_BUILD_FAST,
00050 USTRINGTRIE_BUILD_SMALL
00051 };
00052
00053 U_NAMESPACE_BEGIN
00054
00061 class U_COMMON_API StringTrieBuilder : public UObject {
00062 public:
00063 #ifndef U_HIDE_INTERNAL_API
00064
00065 static UBool hashNode(const void *node);
00067 static UBool equalNodes(const void *left, const void *right);
00068 #endif
00069
00070 protected:
00071
00072
00074 StringTrieBuilder();
00076 virtual ~StringTrieBuilder();
00077
00078 #ifndef U_HIDE_INTERNAL_API
00079
00080 void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
00082 void deleteCompactBuilder();
00083
00085 void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
00086
00088 int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
00090 int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
00091 #endif
00092
00093 class Node;
00094
00095 #ifndef U_HIDE_INTERNAL_API
00096
00097 Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
00099 Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
00100 int32_t length, UErrorCode &errorCode);
00101 #endif
00102
00104 virtual int32_t getElementStringLength(int32_t i) const = 0;
00106 virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;
00108 virtual int32_t getElementValue(int32_t i) const = 0;
00109
00110
00111
00113 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
00114
00115
00117 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
00119 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
00121 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;
00122
00124 virtual UBool matchNodesCanHaveValues() const = 0;
00125
00127 virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
00129 virtual int32_t getMinLinearMatch() const = 0;
00131 virtual int32_t getMaxLinearMatchLength() const = 0;
00132
00133 #ifndef U_HIDE_INTERNAL_API
00134
00136 static const int32_t kMaxBranchLinearSubNodeLength=5;
00137
00138
00139
00141 static const int32_t kMaxSplitBranchLevels=14;
00142
00153 Node *registerNode(Node *newNode, UErrorCode &errorCode);
00164 Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
00165 #endif
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00185 UHashtable *nodes;
00186
00187 #ifndef U_HIDE_INTERNAL_API
00188
00189 class Node : public UObject {
00190 public:
00191 Node(int32_t initialHash) : hash(initialHash), offset(0) {}
00192 inline int32_t hashCode() const { return hash; }
00193
00194 static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
00195
00196 virtual UBool operator==(const Node &other) const;
00197 inline UBool operator!=(const Node &other) const { return !operator==(other); }
00225 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00226
00227 virtual void write(StringTrieBuilder &builder) = 0;
00228
00229 inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
00230 StringTrieBuilder &builder) {
00231
00232
00233
00234
00235
00236 if(offset<0 && (offset<lastRight || firstRight<offset)) {
00237 write(builder);
00238 }
00239 }
00240 inline int32_t getOffset() const { return offset; }
00241 protected:
00242 int32_t hash;
00243 int32_t offset;
00244 };
00245
00246
00247
00248
00249
00250
00251
00253 class FinalValueNode : public Node {
00254 public:
00255 FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
00256 virtual UBool operator==(const Node &other) const;
00257 virtual void write(StringTrieBuilder &builder);
00258 protected:
00259 int32_t value;
00260 };
00261
00265 class ValueNode : public Node {
00266 public:
00267 ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
00268 virtual UBool operator==(const Node &other) const;
00269 void setValue(int32_t v) {
00270 hasValue=TRUE;
00271 value=v;
00272 hash=hash*37+v;
00273 }
00274 protected:
00275 UBool hasValue;
00276 int32_t value;
00277 };
00278
00282 class IntermediateValueNode : public ValueNode {
00283 public:
00284 IntermediateValueNode(int32_t v, Node *nextNode)
00285 : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
00286 virtual UBool operator==(const Node &other) const;
00287 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00288 virtual void write(StringTrieBuilder &builder);
00289 protected:
00290 Node *next;
00291 };
00292
00296 class LinearMatchNode : public ValueNode {
00297 public:
00298 LinearMatchNode(int32_t len, Node *nextNode)
00299 : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
00300 length(len), next(nextNode) {}
00301 virtual UBool operator==(const Node &other) const;
00302 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00303 protected:
00304 int32_t length;
00305 Node *next;
00306 };
00307
00311 class BranchNode : public Node {
00312 public:
00313 BranchNode(int32_t initialHash) : Node(initialHash) {}
00314 protected:
00315 int32_t firstEdgeNumber;
00316 };
00317
00321 class ListBranchNode : public BranchNode {
00322 public:
00323 ListBranchNode() : BranchNode(0x444444), length(0) {}
00324 virtual UBool operator==(const Node &other) const;
00325 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00326 virtual void write(StringTrieBuilder &builder);
00327
00328 void add(int32_t c, int32_t value) {
00329 units[length]=(UChar)c;
00330 equal[length]=NULL;
00331 values[length]=value;
00332 ++length;
00333 hash=(hash*37+c)*37+value;
00334 }
00335
00336 void add(int32_t c, Node *node) {
00337 units[length]=(UChar)c;
00338 equal[length]=node;
00339 values[length]=0;
00340 ++length;
00341 hash=(hash*37+c)*37+hashCode(node);
00342 }
00343 protected:
00344 Node *equal[kMaxBranchLinearSubNodeLength];
00345 int32_t length;
00346 int32_t values[kMaxBranchLinearSubNodeLength];
00347 UChar units[kMaxBranchLinearSubNodeLength];
00348 };
00349
00353 class SplitBranchNode : public BranchNode {
00354 public:
00355 SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
00356 : BranchNode(((0x555555*37+middleUnit)*37+
00357 hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
00358 unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
00359 virtual UBool operator==(const Node &other) const;
00360 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00361 virtual void write(StringTrieBuilder &builder);
00362 protected:
00363 UChar unit;
00364 Node *lessThan;
00365 Node *greaterOrEqual;
00366 };
00367
00368
00370 class BranchHeadNode : public ValueNode {
00371 public:
00372 BranchHeadNode(int32_t len, Node *subNode)
00373 : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
00374 length(len), next(subNode) {}
00375 virtual UBool operator==(const Node &other) const;
00376 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00377 virtual void write(StringTrieBuilder &builder);
00378 protected:
00379 int32_t length;
00380 Node *next;
00381 };
00382 #endif
00383
00385 virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
00386 Node *nextNode) const = 0;
00387
00389 virtual int32_t write(int32_t unit) = 0;
00391 virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
00393 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
00395 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
00397 virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
00398 };
00399
00400 U_NAMESPACE_END
00401
00402 #endif // __STRINGTRIEBUILDER_H__