00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef UNICODESET_H
00014 #define UNICODESET_H
00015
00016 #include "unicode/ucpmap.h"
00017 #include "unicode/unifilt.h"
00018 #include "unicode/unistr.h"
00019 #include "unicode/uset.h"
00020
00026 U_NAMESPACE_BEGIN
00027
00028
00029 class BMPSet;
00030 class ParsePosition;
00031 class RBBIRuleScanner;
00032 class SymbolTable;
00033 class UnicodeSetStringSpan;
00034 class UVector;
00035 class RuleCharacterIterator;
00036
00277 class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
00278 private:
00283 static constexpr int32_t INITIAL_CAPACITY = 25;
00284
00285 static constexpr uint8_t kIsBogus = 1;
00286
00287 UChar32* list = stackList;
00288 int32_t capacity = INITIAL_CAPACITY;
00289 int32_t len = 1;
00290 uint8_t fFlags = 0;
00291
00292 BMPSet *bmpSet = nullptr;
00293 UChar32* buffer = nullptr;
00294 int32_t bufferCapacity = 0;
00295
00305 char16_t *pat = nullptr;
00306 int32_t patLen = 0;
00307
00308 UVector* strings = nullptr;
00309 UnicodeSetStringSpan *stringSpan = nullptr;
00310
00316 UChar32 stackList[INITIAL_CAPACITY];
00317
00318 public:
00328 inline UBool isBogus(void) const;
00329
00346 void setToBogus();
00347
00348 public:
00349
00350 enum {
00355 MIN_VALUE = 0,
00356
00361 MAX_VALUE = 0x10ffff
00362 };
00363
00364
00365
00366
00367
00368 public:
00369
00374 UnicodeSet();
00375
00384 UnicodeSet(UChar32 start, UChar32 end);
00385
00386 #ifndef U_HIDE_INTERNAL_API
00387
00390 enum ESerialization {
00391 kSerialized
00392 };
00393
00404 UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
00405 ESerialization serialization, UErrorCode &status);
00406 #endif
00407
00416 UnicodeSet(const UnicodeString& pattern,
00417 UErrorCode& status);
00418
00419 #ifndef U_HIDE_INTERNAL_API
00420
00432 UnicodeSet(const UnicodeString& pattern,
00433 uint32_t options,
00434 const SymbolTable* symbols,
00435 UErrorCode& status);
00436 #endif
00437
00451 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00452 uint32_t options,
00453 const SymbolTable* symbols,
00454 UErrorCode& status);
00455
00460 UnicodeSet(const UnicodeSet& o);
00461
00466 virtual ~UnicodeSet();
00467
00473 UnicodeSet& operator=(const UnicodeSet& o);
00474
00486 virtual UBool operator==(const UnicodeSet& o) const;
00487
00493 inline UBool operator!=(const UnicodeSet& o) const;
00494
00504 virtual UnicodeFunctor* clone() const;
00505
00513 virtual int32_t hashCode(void) const;
00514
00523 inline static UnicodeSet *fromUSet(USet *uset);
00524
00533 inline static const UnicodeSet *fromUSet(const USet *uset);
00534
00542 inline USet *toUSet();
00543
00544
00552 inline const USet * toUSet() const;
00553
00554
00555
00556
00557
00558
00567 inline UBool isFrozen() const;
00568
00582 UnicodeFunctor *freeze();
00583
00592 UnicodeFunctor *cloneAsThawed() const;
00593
00594
00595
00596
00597
00607 UnicodeSet& set(UChar32 start, UChar32 end);
00608
00614 static UBool resemblesPattern(const UnicodeString& pattern,
00615 int32_t pos);
00616
00629 UnicodeSet& applyPattern(const UnicodeString& pattern,
00630 UErrorCode& status);
00631
00632 #ifndef U_HIDE_INTERNAL_API
00633
00649 UnicodeSet& applyPattern(const UnicodeString& pattern,
00650 uint32_t options,
00651 const SymbolTable* symbols,
00652 UErrorCode& status);
00653 #endif
00654
00686 UnicodeSet& applyPattern(const UnicodeString& pattern,
00687 ParsePosition& pos,
00688 uint32_t options,
00689 const SymbolTable* symbols,
00690 UErrorCode& status);
00691
00705 virtual UnicodeString& toPattern(UnicodeString& result,
00706 UBool escapeUnprintable = FALSE) const;
00707
00730 UnicodeSet& applyIntPropertyValue(UProperty prop,
00731 int32_t value,
00732 UErrorCode& ec);
00733
00763 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00764 const UnicodeString& value,
00765 UErrorCode& ec);
00766
00775 virtual int32_t size(void) const;
00776
00783 virtual UBool isEmpty(void) const;
00784
00792 virtual UBool contains(UChar32 c) const;
00793
00802 virtual UBool contains(UChar32 start, UChar32 end) const;
00803
00811 UBool contains(const UnicodeString& s) const;
00812
00820 virtual UBool containsAll(const UnicodeSet& c) const;
00821
00829 UBool containsAll(const UnicodeString& s) const;
00830
00839 UBool containsNone(UChar32 start, UChar32 end) const;
00840
00848 UBool containsNone(const UnicodeSet& c) const;
00849
00857 UBool containsNone(const UnicodeString& s) const;
00858
00867 inline UBool containsSome(UChar32 start, UChar32 end) const;
00868
00876 inline UBool containsSome(const UnicodeSet& s) const;
00877
00885 inline UBool containsSome(const UnicodeString& s) const;
00886
00905 int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
00906
00919 inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
00920
00938 int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
00939
00953 inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
00954
00973 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00974
00992 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00993
00998 virtual UMatchDegree matches(const Replaceable& text,
00999 int32_t& offset,
01000 int32_t limit,
01001 UBool incremental);
01002
01003 private:
01026 static int32_t matchRest(const Replaceable& text,
01027 int32_t start, int32_t limit,
01028 const UnicodeString& s);
01029
01039 int32_t findCodePoint(UChar32 c) const;
01040
01041 public:
01042
01050 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
01051
01060 int32_t indexOf(UChar32 c) const;
01061
01071 UChar32 charAt(int32_t index) const;
01072
01087 virtual UnicodeSet& add(UChar32 start, UChar32 end);
01088
01096 UnicodeSet& add(UChar32 c);
01097
01109 UnicodeSet& add(const UnicodeString& s);
01110
01111 private:
01117 static int32_t getSingleCP(const UnicodeString& s);
01118
01119 void _add(const UnicodeString& s);
01120
01121 public:
01130 UnicodeSet& addAll(const UnicodeString& s);
01131
01140 UnicodeSet& retainAll(const UnicodeString& s);
01141
01150 UnicodeSet& complementAll(const UnicodeString& s);
01151
01160 UnicodeSet& removeAll(const UnicodeString& s);
01161
01170 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01171
01172
01180 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01181
01195 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01196
01197
01203 UnicodeSet& retain(UChar32 c);
01204
01218 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01219
01227 UnicodeSet& remove(UChar32 c);
01228
01238 UnicodeSet& remove(const UnicodeString& s);
01239
01247 virtual UnicodeSet& complement(void);
01248
01263 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01264
01272 UnicodeSet& complement(UChar32 c);
01273
01284 UnicodeSet& complement(const UnicodeString& s);
01285
01298 virtual UnicodeSet& addAll(const UnicodeSet& c);
01299
01311 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01312
01324 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01325
01336 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01337
01344 virtual UnicodeSet& clear(void);
01345
01371 UnicodeSet& closeOver(int32_t attribute);
01372
01379 virtual UnicodeSet &removeAllStrings();
01380
01388 virtual int32_t getRangeCount(void) const;
01389
01397 virtual UChar32 getRangeStart(int32_t index) const;
01398
01406 virtual UChar32 getRangeEnd(int32_t index) const;
01407
01456 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01457
01464 virtual UnicodeSet& compact();
01465
01477 static UClassID U_EXPORT2 getStaticClassID(void);
01478
01487 virtual UClassID getDynamicClassID(void) const;
01488
01489 private:
01490
01491
01492
01493 friend class USetAccess;
01494
01495 const UnicodeString* getString(int32_t index) const;
01496
01497
01498
01499
01500
01501 private:
01502
01508 virtual UBool matchesIndexValue(uint8_t v) const;
01509
01510 private:
01511 friend class RBBIRuleScanner;
01512
01513
01514
01515
01516
01517 UnicodeSet(const UnicodeSet& o, UBool );
01518 UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);
01519
01520
01521
01522
01523
01524 void applyPatternIgnoreSpace(const UnicodeString& pattern,
01525 ParsePosition& pos,
01526 const SymbolTable* symbols,
01527 UErrorCode& status);
01528
01529 void applyPattern(RuleCharacterIterator& chars,
01530 const SymbolTable* symbols,
01531 UnicodeString& rebuiltPat,
01532 uint32_t options,
01533 UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
01534 int32_t depth,
01535 UErrorCode& ec);
01536
01537
01538
01539
01540
01541 static int32_t nextCapacity(int32_t minCapacity);
01542
01543 bool ensureCapacity(int32_t newLen);
01544
01545 bool ensureBufferCapacity(int32_t newLen);
01546
01547 void swapBuffers(void);
01548
01549 UBool allocateStrings(UErrorCode &status);
01550 UBool hasStrings() const;
01551 int32_t stringsSize() const;
01552 UBool stringsContains(const UnicodeString &s) const;
01553
01554 UnicodeString& _toPattern(UnicodeString& result,
01555 UBool escapeUnprintable) const;
01556
01557 UnicodeString& _generatePattern(UnicodeString& result,
01558 UBool escapeUnprintable) const;
01559
01560 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01561
01562 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01563
01564
01565
01566
01567
01568 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01569
01570 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01571
01572 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01573
01579 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01580 int32_t pos);
01581
01582 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01583 int32_t iterOpts);
01584
01624 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01625 ParsePosition& ppos,
01626 UErrorCode &ec);
01627
01628 void applyPropertyPattern(RuleCharacterIterator& chars,
01629 UnicodeString& rebuiltPat,
01630 UErrorCode& ec);
01631
01632 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01633
01638 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01639
01649 void applyFilter(Filter filter,
01650 void* context,
01651 const UnicodeSet* inclusions,
01652 UErrorCode &status);
01653
01654 #ifndef U_HIDE_DRAFT_API // Skipped: ucpmap.h is draft only.
01655 void applyIntPropertyValue(const UCPMap *map,
01656 UCPMapValueFilter *filter, const void *context,
01657 UErrorCode &errorCode);
01658 #endif
01659
01663 void setPattern(const UnicodeString& newPat) {
01664 setPattern(newPat.getBuffer(), newPat.length());
01665 }
01666 void setPattern(const char16_t *newPat, int32_t newPatLen);
01670 void releasePattern();
01671
01672 friend class UnicodeSetIterator;
01673 };
01674
01675
01676
01677 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01678 return !operator==(o);
01679 }
01680
01681 inline UBool UnicodeSet::isFrozen() const {
01682 return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01683 }
01684
01685 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01686 return !containsNone(start, end);
01687 }
01688
01689 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01690 return !containsNone(s);
01691 }
01692
01693 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01694 return !containsNone(s);
01695 }
01696
01697 inline UBool UnicodeSet::isBogus() const {
01698 return (UBool)(fFlags & kIsBogus);
01699 }
01700
01701 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
01702 return reinterpret_cast<UnicodeSet *>(uset);
01703 }
01704
01705 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
01706 return reinterpret_cast<const UnicodeSet *>(uset);
01707 }
01708
01709 inline USet *UnicodeSet::toUSet() {
01710 return reinterpret_cast<USet *>(this);
01711 }
01712
01713 inline const USet *UnicodeSet::toUSet() const {
01714 return reinterpret_cast<const USet *>(this);
01715 }
01716
01717 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
01718 int32_t sLength=s.length();
01719 if(start<0) {
01720 start=0;
01721 } else if(start>sLength) {
01722 start=sLength;
01723 }
01724 return start+span(s.getBuffer()+start, sLength-start, spanCondition);
01725 }
01726
01727 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
01728 int32_t sLength=s.length();
01729 if(limit<0) {
01730 limit=0;
01731 } else if(limit>sLength) {
01732 limit=sLength;
01733 }
01734 return spanBack(s.getBuffer(), limit, spanCondition);
01735 }
01736
01737 U_NAMESPACE_END
01738
01739 #endif