00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef UNICODESET_H
00014 #define UNICODESET_H
00015
00016 #include "unicode/ucpmap.h"
00017 #include "unicode/unifilt.h"
00018 #include "unicode/unistr.h"
00019 #include "unicode/uset.h"
00020
00026 U_NAMESPACE_BEGIN
00027
00028
00029 class BMPSet;
00030 class CharacterProperties;
00031 class ParsePosition;
00032 class RBBIRuleScanner;
00033 class SymbolTable;
00034 class UnicodeSetStringSpan;
00035 class UVector;
00036 class RuleCharacterIterator;
00037
00278 class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
00279
00280 int32_t len;
00281 int32_t capacity;
00282 UChar32* list;
00283 BMPSet *bmpSet;
00284 UChar32* buffer;
00285 int32_t bufferCapacity;
00286 int32_t patLen;
00287
00297 char16_t *pat;
00298 UVector* strings;
00299 UnicodeSetStringSpan *stringSpan;
00300
00301 private:
00302 enum {
00303 kIsBogus = 1
00304 };
00305 uint8_t fFlags;
00306 public:
00316 inline UBool isBogus(void) const;
00317
00334 void setToBogus();
00335
00336 public:
00337
00338 enum {
00343 MIN_VALUE = 0,
00344
00349 MAX_VALUE = 0x10ffff
00350 };
00351
00352
00353
00354
00355
00356 public:
00357
00362 UnicodeSet();
00363
00372 UnicodeSet(UChar32 start, UChar32 end);
00373
00374 #ifndef U_HIDE_INTERNAL_API
00375
00378 enum ESerialization {
00379 kSerialized
00380 };
00381
00392 UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
00393 ESerialization serialization, UErrorCode &status);
00394 #endif
00395
00404 UnicodeSet(const UnicodeString& pattern,
00405 UErrorCode& status);
00406
00407 #ifndef U_HIDE_INTERNAL_API
00408
00420 UnicodeSet(const UnicodeString& pattern,
00421 uint32_t options,
00422 const SymbolTable* symbols,
00423 UErrorCode& status);
00424 #endif
00425
00439 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00440 uint32_t options,
00441 const SymbolTable* symbols,
00442 UErrorCode& status);
00443
00448 UnicodeSet(const UnicodeSet& o);
00449
00454 virtual ~UnicodeSet();
00455
00461 UnicodeSet& operator=(const UnicodeSet& o);
00462
00474 virtual UBool operator==(const UnicodeSet& o) const;
00475
00481 UBool operator!=(const UnicodeSet& o) const;
00482
00492 virtual UnicodeFunctor* clone() const;
00493
00501 virtual int32_t hashCode(void) const;
00502
00511 inline static UnicodeSet *fromUSet(USet *uset);
00512
00521 inline static const UnicodeSet *fromUSet(const USet *uset);
00522
00530 inline USet *toUSet();
00531
00532
00540 inline const USet * toUSet() const;
00541
00542
00543
00544
00545
00546
00555 inline UBool isFrozen() const;
00556
00570 UnicodeFunctor *freeze();
00571
00580 UnicodeFunctor *cloneAsThawed() const;
00581
00582
00583
00584
00585
00595 UnicodeSet& set(UChar32 start, UChar32 end);
00596
00602 static UBool resemblesPattern(const UnicodeString& pattern,
00603 int32_t pos);
00604
00617 UnicodeSet& applyPattern(const UnicodeString& pattern,
00618 UErrorCode& status);
00619
00620 #ifndef U_HIDE_INTERNAL_API
00621
00637 UnicodeSet& applyPattern(const UnicodeString& pattern,
00638 uint32_t options,
00639 const SymbolTable* symbols,
00640 UErrorCode& status);
00641 #endif
00642
00674 UnicodeSet& applyPattern(const UnicodeString& pattern,
00675 ParsePosition& pos,
00676 uint32_t options,
00677 const SymbolTable* symbols,
00678 UErrorCode& status);
00679
00693 virtual UnicodeString& toPattern(UnicodeString& result,
00694 UBool escapeUnprintable = FALSE) const;
00695
00718 UnicodeSet& applyIntPropertyValue(UProperty prop,
00719 int32_t value,
00720 UErrorCode& ec);
00721
00751 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00752 const UnicodeString& value,
00753 UErrorCode& ec);
00754
00763 virtual int32_t size(void) const;
00764
00771 virtual UBool isEmpty(void) const;
00772
00780 virtual UBool contains(UChar32 c) const;
00781
00790 virtual UBool contains(UChar32 start, UChar32 end) const;
00791
00799 UBool contains(const UnicodeString& s) const;
00800
00808 virtual UBool containsAll(const UnicodeSet& c) const;
00809
00817 UBool containsAll(const UnicodeString& s) const;
00818
00827 UBool containsNone(UChar32 start, UChar32 end) const;
00828
00836 UBool containsNone(const UnicodeSet& c) const;
00837
00845 UBool containsNone(const UnicodeString& s) const;
00846
00855 inline UBool containsSome(UChar32 start, UChar32 end) const;
00856
00864 inline UBool containsSome(const UnicodeSet& s) const;
00865
00873 inline UBool containsSome(const UnicodeString& s) const;
00874
00893 int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
00894
00907 inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
00908
00926 int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
00927
00941 inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
00942
00961 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00962
00980 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00981
00986 virtual UMatchDegree matches(const Replaceable& text,
00987 int32_t& offset,
00988 int32_t limit,
00989 UBool incremental);
00990
00991 private:
01014 static int32_t matchRest(const Replaceable& text,
01015 int32_t start, int32_t limit,
01016 const UnicodeString& s);
01017
01027 int32_t findCodePoint(UChar32 c) const;
01028
01029 public:
01030
01038 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
01039
01048 int32_t indexOf(UChar32 c) const;
01049
01059 UChar32 charAt(int32_t index) const;
01060
01075 virtual UnicodeSet& add(UChar32 start, UChar32 end);
01076
01084 UnicodeSet& add(UChar32 c);
01085
01097 UnicodeSet& add(const UnicodeString& s);
01098
01099 private:
01105 static int32_t getSingleCP(const UnicodeString& s);
01106
01107 void _add(const UnicodeString& s);
01108
01109 public:
01118 UnicodeSet& addAll(const UnicodeString& s);
01119
01128 UnicodeSet& retainAll(const UnicodeString& s);
01129
01138 UnicodeSet& complementAll(const UnicodeString& s);
01139
01148 UnicodeSet& removeAll(const UnicodeString& s);
01149
01158 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01159
01160
01168 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01169
01183 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01184
01185
01191 UnicodeSet& retain(UChar32 c);
01192
01206 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01207
01215 UnicodeSet& remove(UChar32 c);
01216
01226 UnicodeSet& remove(const UnicodeString& s);
01227
01235 virtual UnicodeSet& complement(void);
01236
01251 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01252
01260 UnicodeSet& complement(UChar32 c);
01261
01272 UnicodeSet& complement(const UnicodeString& s);
01273
01286 virtual UnicodeSet& addAll(const UnicodeSet& c);
01287
01299 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01300
01312 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01313
01324 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01325
01332 virtual UnicodeSet& clear(void);
01333
01359 UnicodeSet& closeOver(int32_t attribute);
01360
01367 virtual UnicodeSet &removeAllStrings();
01368
01376 virtual int32_t getRangeCount(void) const;
01377
01385 virtual UChar32 getRangeStart(int32_t index) const;
01386
01394 virtual UChar32 getRangeEnd(int32_t index) const;
01395
01444 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01445
01452 virtual UnicodeSet& compact();
01453
01465 static UClassID U_EXPORT2 getStaticClassID(void);
01466
01475 virtual UClassID getDynamicClassID(void) const;
01476
01477 private:
01478
01479
01480
01481 friend class USetAccess;
01482
01483 int32_t getStringCount() const;
01484
01485 const UnicodeString* getString(int32_t index) const;
01486
01487
01488
01489
01490
01491 private:
01492
01498 virtual UBool matchesIndexValue(uint8_t v) const;
01499
01500 private:
01501 friend class RBBIRuleScanner;
01502
01503
01504
01505
01506
01507 UnicodeSet(const UnicodeSet& o, UBool );
01508 UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);
01509
01510
01511
01512
01513
01514 void applyPatternIgnoreSpace(const UnicodeString& pattern,
01515 ParsePosition& pos,
01516 const SymbolTable* symbols,
01517 UErrorCode& status);
01518
01519 void applyPattern(RuleCharacterIterator& chars,
01520 const SymbolTable* symbols,
01521 UnicodeString& rebuiltPat,
01522 uint32_t options,
01523 UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
01524 int32_t depth,
01525 UErrorCode& ec);
01526
01527
01528
01529
01530
01531 void ensureCapacity(int32_t newLen, UErrorCode& ec);
01532
01533 void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
01534
01535 void swapBuffers(void);
01536
01537 UBool allocateStrings(UErrorCode &status);
01538
01539 UnicodeString& _toPattern(UnicodeString& result,
01540 UBool escapeUnprintable) const;
01541
01542 UnicodeString& _generatePattern(UnicodeString& result,
01543 UBool escapeUnprintable) const;
01544
01545 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01546
01547 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01548
01549
01550
01551
01552
01553 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01554
01555 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01556
01557 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01558
01564 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01565 int32_t pos);
01566
01567 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01568 int32_t iterOpts);
01569
01609 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01610 ParsePosition& ppos,
01611 UErrorCode &ec);
01612
01613 void applyPropertyPattern(RuleCharacterIterator& chars,
01614 UnicodeString& rebuiltPat,
01615 UErrorCode& ec);
01616
01617 friend class CharacterProperties;
01618 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01619
01624 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01625
01635 void applyFilter(Filter filter,
01636 void* context,
01637 const UnicodeSet* inclusions,
01638 UErrorCode &status);
01639
01640 #ifndef U_HIDE_DRAFT_API // Skipped: ucpmap.h is draft only.
01641 void applyIntPropertyValue(const UCPMap *map,
01642 UCPMapValueFilter *filter, const void *context,
01643 UErrorCode &errorCode);
01644 #endif
01645
01649 void setPattern(const UnicodeString& newPat);
01653 void releasePattern();
01654
01655 friend class UnicodeSetIterator;
01656 };
01657
01658
01659
01660 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01661 return !operator==(o);
01662 }
01663
01664 inline UBool UnicodeSet::isFrozen() const {
01665 return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01666 }
01667
01668 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01669 return !containsNone(start, end);
01670 }
01671
01672 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01673 return !containsNone(s);
01674 }
01675
01676 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01677 return !containsNone(s);
01678 }
01679
01680 inline UBool UnicodeSet::isBogus() const {
01681 return (UBool)(fFlags & kIsBogus);
01682 }
01683
01684 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
01685 return reinterpret_cast<UnicodeSet *>(uset);
01686 }
01687
01688 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
01689 return reinterpret_cast<const UnicodeSet *>(uset);
01690 }
01691
01692 inline USet *UnicodeSet::toUSet() {
01693 return reinterpret_cast<USet *>(this);
01694 }
01695
01696 inline const USet *UnicodeSet::toUSet() const {
01697 return reinterpret_cast<const USet *>(this);
01698 }
01699
01700 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
01701 int32_t sLength=s.length();
01702 if(start<0) {
01703 start=0;
01704 } else if(start>sLength) {
01705 start=sLength;
01706 }
01707 return start+span(s.getBuffer()+start, sLength-start, spanCondition);
01708 }
01709
01710 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
01711 int32_t sLength=s.length();
01712 if(limit<0) {
01713 limit=0;
01714 } else if(limit>sLength) {
01715 limit=sLength;
01716 }
01717 return spanBack(s.getBuffer(), limit, spanCondition);
01718 }
01719
01720 U_NAMESPACE_END
01721
01722 #endif