00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef __STRINGTRIEBUILDER_H__
00018 #define __STRINGTRIEBUILDER_H__
00019
00020 #include "unicode/utypes.h"
00021 #include "unicode/uobject.h"
00022
00028
00029 struct UHashtable;
00030 typedef struct UHashtable UHashtable;
00031
00036 enum UStringTrieBuildOption {
00041 USTRINGTRIE_BUILD_FAST,
00052 USTRINGTRIE_BUILD_SMALL
00053 };
00054
00055 U_NAMESPACE_BEGIN
00056
00063 class U_COMMON_API StringTrieBuilder : public UObject {
00064 public:
00065 #ifndef U_HIDE_INTERNAL_API
00066
00067 static UBool hashNode(const void *node);
00069 static UBool equalNodes(const void *left, const void *right);
00070 #endif
00071
00072 protected:
00073
00074
00076 StringTrieBuilder();
00078 virtual ~StringTrieBuilder();
00079
00080 #ifndef U_HIDE_INTERNAL_API
00081
00082 void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
00084 void deleteCompactBuilder();
00085
00087 void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
00088
00090 int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
00092 int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
00093 #endif
00094
00095 class Node;
00096
00097 #ifndef U_HIDE_INTERNAL_API
00098
00099 Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
00101 Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
00102 int32_t length, UErrorCode &errorCode);
00103 #endif
00104
00106 virtual int32_t getElementStringLength(int32_t i) const = 0;
00108 virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
00110 virtual int32_t getElementValue(int32_t i) const = 0;
00111
00112
00113
00115 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
00116
00117
00119 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
00121 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
00123 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
00124
00126 virtual UBool matchNodesCanHaveValues() const = 0;
00127
00129 virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
00131 virtual int32_t getMinLinearMatch() const = 0;
00133 virtual int32_t getMaxLinearMatchLength() const = 0;
00134
00135 #ifndef U_HIDE_INTERNAL_API
00136
00138 static const int32_t kMaxBranchLinearSubNodeLength=5;
00139
00140
00141
00143 static const int32_t kMaxSplitBranchLevels=14;
00144
00155 Node *registerNode(Node *newNode, UErrorCode &errorCode);
00166 Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
00167 #endif
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00187 UHashtable *nodes;
00188
00189
00190
00192 class Node : public UObject {
00193 public:
00194 Node(int32_t initialHash) : hash(initialHash), offset(0) {}
00195 inline int32_t hashCode() const { return hash; }
00196
00197 static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
00198
00199 virtual UBool operator==(const Node &other) const;
00200 inline UBool operator!=(const Node &other) const { return !operator==(other); }
00228 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00229
00230 virtual void write(StringTrieBuilder &builder) = 0;
00231
00232 inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
00233 StringTrieBuilder &builder) {
00234
00235
00236
00237
00238
00239 if(offset<0 && (offset<lastRight || firstRight<offset)) {
00240 write(builder);
00241 }
00242 }
00243 inline int32_t getOffset() const { return offset; }
00244 protected:
00245 int32_t hash;
00246 int32_t offset;
00247 };
00248
00249 #ifndef U_HIDE_INTERNAL_API
00250
00251
00252
00253
00254
00255
00257 class FinalValueNode : public Node {
00258 public:
00259 FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
00260 virtual UBool operator==(const Node &other) const;
00261 virtual void write(StringTrieBuilder &builder);
00262 protected:
00263 int32_t value;
00264 };
00265 #endif
00266
00267
00268
00272 class ValueNode : public Node {
00273 public:
00274 ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
00275 virtual UBool operator==(const Node &other) const;
00276 void setValue(int32_t v) {
00277 hasValue=TRUE;
00278 value=v;
00279 hash=hash*37u+v;
00280 }
00281 protected:
00282 UBool hasValue;
00283 int32_t value;
00284 };
00285
00286 #ifndef U_HIDE_INTERNAL_API
00287
00290 class IntermediateValueNode : public ValueNode {
00291 public:
00292 IntermediateValueNode(int32_t v, Node *nextNode)
00293 : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
00294 virtual UBool operator==(const Node &other) const;
00295 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00296 virtual void write(StringTrieBuilder &builder);
00297 protected:
00298 Node *next;
00299 };
00300 #endif
00301
00302
00303
00307 class LinearMatchNode : public ValueNode {
00308 public:
00309 LinearMatchNode(int32_t len, Node *nextNode)
00310 : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
00311 length(len), next(nextNode) {}
00312 virtual UBool operator==(const Node &other) const;
00313 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00314 protected:
00315 int32_t length;
00316 Node *next;
00317 };
00318
00319 #ifndef U_HIDE_INTERNAL_API
00320
00323 class BranchNode : public Node {
00324 public:
00325 BranchNode(int32_t initialHash) : Node(initialHash) {}
00326 protected:
00327 int32_t firstEdgeNumber;
00328 };
00329
00333 class ListBranchNode : public BranchNode {
00334 public:
00335 ListBranchNode() : BranchNode(0x444444), length(0) {}
00336 virtual UBool operator==(const Node &other) const;
00337 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00338 virtual void write(StringTrieBuilder &builder);
00339
00340 void add(int32_t c, int32_t value) {
00341 units[length]=(char16_t)c;
00342 equal[length]=NULL;
00343 values[length]=value;
00344 ++length;
00345 hash=(hash*37u+c)*37u+value;
00346 }
00347
00348 void add(int32_t c, Node *node) {
00349 units[length]=(char16_t)c;
00350 equal[length]=node;
00351 values[length]=0;
00352 ++length;
00353 hash=(hash*37u+c)*37u+hashCode(node);
00354 }
00355 protected:
00356 Node *equal[kMaxBranchLinearSubNodeLength];
00357 int32_t length;
00358 int32_t values[kMaxBranchLinearSubNodeLength];
00359 char16_t units[kMaxBranchLinearSubNodeLength];
00360 };
00361
00365 class SplitBranchNode : public BranchNode {
00366 public:
00367 SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
00368 : BranchNode(((0x555555u*37u+middleUnit)*37u+
00369 hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
00370 unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
00371 virtual UBool operator==(const Node &other) const;
00372 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00373 virtual void write(StringTrieBuilder &builder);
00374 protected:
00375 char16_t unit;
00376 Node *lessThan;
00377 Node *greaterOrEqual;
00378 };
00379
00380
00382 class BranchHeadNode : public ValueNode {
00383 public:
00384 BranchHeadNode(int32_t len, Node *subNode)
00385 : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
00386 length(len), next(subNode) {}
00387 virtual UBool operator==(const Node &other) const;
00388 virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
00389 virtual void write(StringTrieBuilder &builder);
00390 protected:
00391 int32_t length;
00392 Node *next;
00393 };
00394 #endif
00395
00397 virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
00398 Node *nextNode) const = 0;
00399
00401 virtual int32_t write(int32_t unit) = 0;
00403 virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
00405 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
00407 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
00409 virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
00410 };
00411
00412 U_NAMESPACE_END
00413
00414 #endif // __STRINGTRIEBUILDER_H__