00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef UNICODESET_H
00014 #define UNICODESET_H
00015
00016 #include "unicode/unifilt.h"
00017 #include "unicode/unistr.h"
00018 #include "unicode/uset.h"
00019
00025 U_NAMESPACE_BEGIN
00026
00027
00028 void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
00030 class BMPSet;
00031 class ParsePosition;
00032 class RBBIRuleScanner;
00033 class SymbolTable;
00034 class UnicodeSetStringSpan;
00035 class UVector;
00036 class RuleCharacterIterator;
00037
00278 class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
00279
00280 int32_t len;
00281 int32_t capacity;
00282 UChar32* list;
00283 BMPSet *bmpSet;
00284 UChar32* buffer;
00285 int32_t bufferCapacity;
00286 int32_t patLen;
00287
00297 char16_t *pat;
00298 UVector* strings;
00299 UnicodeSetStringSpan *stringSpan;
00300
00301 private:
00302 enum {
00303 kIsBogus = 1
00304 };
00305 uint8_t fFlags;
00306 public:
00316 inline UBool isBogus(void) const;
00317
00334 void setToBogus();
00335
00336 public:
00337
00338 enum {
00343 MIN_VALUE = 0,
00344
00349 MAX_VALUE = 0x10ffff
00350 };
00351
00352
00353
00354
00355
00356 public:
00357
00362 UnicodeSet();
00363
00372 UnicodeSet(UChar32 start, UChar32 end);
00373
00374 #ifndef U_HIDE_INTERNAL_API
00375
00378 enum ESerialization {
00379 kSerialized
00380 };
00381
00392 UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
00393 ESerialization serialization, UErrorCode &status);
00394 #endif
00395
00404 UnicodeSet(const UnicodeString& pattern,
00405 UErrorCode& status);
00406
00407 #ifndef U_HIDE_INTERNAL_API
00408
00420 UnicodeSet(const UnicodeString& pattern,
00421 uint32_t options,
00422 const SymbolTable* symbols,
00423 UErrorCode& status);
00424 #endif
00425
00439 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00440 uint32_t options,
00441 const SymbolTable* symbols,
00442 UErrorCode& status);
00443
00448 UnicodeSet(const UnicodeSet& o);
00449
00454 virtual ~UnicodeSet();
00455
00461 UnicodeSet& operator=(const UnicodeSet& o);
00462
00474 virtual UBool operator==(const UnicodeSet& o) const;
00475
00481 UBool operator!=(const UnicodeSet& o) const;
00482
00492 virtual UnicodeFunctor* clone() const;
00493
00501 virtual int32_t hashCode(void) const;
00502
00511 inline static UnicodeSet *fromUSet(USet *uset);
00512
00521 inline static const UnicodeSet *fromUSet(const USet *uset);
00522
00530 inline USet *toUSet();
00531
00532
00540 inline const USet * toUSet() const;
00541
00542
00543
00544
00545
00546
00555 inline UBool isFrozen() const;
00556
00570 UnicodeFunctor *freeze();
00571
00580 UnicodeFunctor *cloneAsThawed() const;
00581
00582
00583
00584
00585
00596 UnicodeSet& set(UChar32 start, UChar32 end);
00597
00603 static UBool resemblesPattern(const UnicodeString& pattern,
00604 int32_t pos);
00605
00618 UnicodeSet& applyPattern(const UnicodeString& pattern,
00619 UErrorCode& status);
00620
00621 #ifndef U_HIDE_INTERNAL_API
00622
00638 UnicodeSet& applyPattern(const UnicodeString& pattern,
00639 uint32_t options,
00640 const SymbolTable* symbols,
00641 UErrorCode& status);
00642 #endif
00643
00675 UnicodeSet& applyPattern(const UnicodeString& pattern,
00676 ParsePosition& pos,
00677 uint32_t options,
00678 const SymbolTable* symbols,
00679 UErrorCode& status);
00680
00694 virtual UnicodeString& toPattern(UnicodeString& result,
00695 UBool escapeUnprintable = FALSE) const;
00696
00719 UnicodeSet& applyIntPropertyValue(UProperty prop,
00720 int32_t value,
00721 UErrorCode& ec);
00722
00752 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00753 const UnicodeString& value,
00754 UErrorCode& ec);
00755
00764 virtual int32_t size(void) const;
00765
00772 virtual UBool isEmpty(void) const;
00773
00781 virtual UBool contains(UChar32 c) const;
00782
00791 virtual UBool contains(UChar32 start, UChar32 end) const;
00792
00800 UBool contains(const UnicodeString& s) const;
00801
00809 virtual UBool containsAll(const UnicodeSet& c) const;
00810
00818 UBool containsAll(const UnicodeString& s) const;
00819
00828 UBool containsNone(UChar32 start, UChar32 end) const;
00829
00837 UBool containsNone(const UnicodeSet& c) const;
00838
00846 UBool containsNone(const UnicodeString& s) const;
00847
00856 inline UBool containsSome(UChar32 start, UChar32 end) const;
00857
00865 inline UBool containsSome(const UnicodeSet& s) const;
00866
00874 inline UBool containsSome(const UnicodeString& s) const;
00875
00894 int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
00895
00908 inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
00909
00927 int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
00928
00942 inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
00943
00962 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00963
00981 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00982
00987 virtual UMatchDegree matches(const Replaceable& text,
00988 int32_t& offset,
00989 int32_t limit,
00990 UBool incremental);
00991
00992 private:
01015 static int32_t matchRest(const Replaceable& text,
01016 int32_t start, int32_t limit,
01017 const UnicodeString& s);
01018
01028 int32_t findCodePoint(UChar32 c) const;
01029
01030 public:
01031
01039 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
01040
01049 int32_t indexOf(UChar32 c) const;
01050
01060 UChar32 charAt(int32_t index) const;
01061
01076 virtual UnicodeSet& add(UChar32 start, UChar32 end);
01077
01085 UnicodeSet& add(UChar32 c);
01086
01098 UnicodeSet& add(const UnicodeString& s);
01099
01100 private:
01106 static int32_t getSingleCP(const UnicodeString& s);
01107
01108 void _add(const UnicodeString& s);
01109
01110 public:
01119 UnicodeSet& addAll(const UnicodeString& s);
01120
01129 UnicodeSet& retainAll(const UnicodeString& s);
01130
01139 UnicodeSet& complementAll(const UnicodeString& s);
01140
01149 UnicodeSet& removeAll(const UnicodeString& s);
01150
01159 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01160
01161
01169 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01170
01184 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01185
01186
01192 UnicodeSet& retain(UChar32 c);
01193
01207 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01208
01216 UnicodeSet& remove(UChar32 c);
01217
01227 UnicodeSet& remove(const UnicodeString& s);
01228
01236 virtual UnicodeSet& complement(void);
01237
01252 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01253
01261 UnicodeSet& complement(UChar32 c);
01262
01273 UnicodeSet& complement(const UnicodeString& s);
01274
01287 virtual UnicodeSet& addAll(const UnicodeSet& c);
01288
01300 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01301
01313 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01314
01325 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01326
01333 virtual UnicodeSet& clear(void);
01334
01360 UnicodeSet& closeOver(int32_t attribute);
01361
01368 virtual UnicodeSet &removeAllStrings();
01369
01377 virtual int32_t getRangeCount(void) const;
01378
01386 virtual UChar32 getRangeStart(int32_t index) const;
01387
01395 virtual UChar32 getRangeEnd(int32_t index) const;
01396
01445 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01446
01453 virtual UnicodeSet& compact();
01454
01466 static UClassID U_EXPORT2 getStaticClassID(void);
01467
01476 virtual UClassID getDynamicClassID(void) const;
01477
01478 private:
01479
01480
01481
01482 friend class USetAccess;
01483
01484 int32_t getStringCount() const;
01485
01486 const UnicodeString* getString(int32_t index) const;
01487
01488
01489
01490
01491
01492 private:
01493
01499 virtual UBool matchesIndexValue(uint8_t v) const;
01500
01501 private:
01502 friend class RBBIRuleScanner;
01503
01504
01505
01506
01507
01508 UnicodeSet(const UnicodeSet& o, UBool );
01509
01510
01511
01512
01513
01514 void applyPatternIgnoreSpace(const UnicodeString& pattern,
01515 ParsePosition& pos,
01516 const SymbolTable* symbols,
01517 UErrorCode& status);
01518
01519 void applyPattern(RuleCharacterIterator& chars,
01520 const SymbolTable* symbols,
01521 UnicodeString& rebuiltPat,
01522 uint32_t options,
01523 UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
01524 UErrorCode& ec);
01525
01526
01527
01528
01529
01530 void ensureCapacity(int32_t newLen, UErrorCode& ec);
01531
01532 void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
01533
01534 void swapBuffers(void);
01535
01536 UBool allocateStrings(UErrorCode &status);
01537
01538 UnicodeString& _toPattern(UnicodeString& result,
01539 UBool escapeUnprintable) const;
01540
01541 UnicodeString& _generatePattern(UnicodeString& result,
01542 UBool escapeUnprintable) const;
01543
01544 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01545
01546 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01547
01548
01549
01550
01551
01552 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01553
01554 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01555
01556 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01557
01563 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01564 int32_t pos);
01565
01566 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01567 int32_t iterOpts);
01568
01608 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01609 ParsePosition& ppos,
01610 UErrorCode &ec);
01611
01612 void applyPropertyPattern(RuleCharacterIterator& chars,
01613 UnicodeString& rebuiltPat,
01614 UErrorCode& ec);
01615
01616 friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
01617 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01618
01623 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01624
01634 void applyFilter(Filter filter,
01635 void* context,
01636 int32_t src,
01637 UErrorCode &status);
01638
01642 void setPattern(const UnicodeString& newPat);
01646 void releasePattern();
01647
01648 friend class UnicodeSetIterator;
01649 };
01650
01651
01652
01653 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01654 return !operator==(o);
01655 }
01656
01657 inline UBool UnicodeSet::isFrozen() const {
01658 return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01659 }
01660
01661 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01662 return !containsNone(start, end);
01663 }
01664
01665 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01666 return !containsNone(s);
01667 }
01668
01669 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01670 return !containsNone(s);
01671 }
01672
01673 inline UBool UnicodeSet::isBogus() const {
01674 return (UBool)(fFlags & kIsBogus);
01675 }
01676
01677 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
01678 return reinterpret_cast<UnicodeSet *>(uset);
01679 }
01680
01681 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
01682 return reinterpret_cast<const UnicodeSet *>(uset);
01683 }
01684
01685 inline USet *UnicodeSet::toUSet() {
01686 return reinterpret_cast<USet *>(this);
01687 }
01688
01689 inline const USet *UnicodeSet::toUSet() const {
01690 return reinterpret_cast<const USet *>(this);
01691 }
01692
01693 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
01694 int32_t sLength=s.length();
01695 if(start<0) {
01696 start=0;
01697 } else if(start>sLength) {
01698 start=sLength;
01699 }
01700 return start+span(s.getBuffer()+start, sLength-start, spanCondition);
01701 }
01702
01703 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
01704 int32_t sLength=s.length();
01705 if(limit<0) {
01706 limit=0;
01707 } else if(limit>sLength) {
01708 limit=sLength;
01709 }
01710 return spanBack(s.getBuffer(), limit, spanCondition);
01711 }
01712
01713 U_NAMESPACE_END
01714
01715 #endif