00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017
00023 U_NAMESPACE_BEGIN
00024
00025
00026 void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
00028 class BMPSet;
00029 class ParsePosition;
00030 class RBBIRuleScanner;
00031 class SymbolTable;
00032 class UnicodeSetStringSpan;
00033 class UVector;
00034 class RuleCharacterIterator;
00035
00276 class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
00277
00278 int32_t len;
00279 int32_t capacity;
00280 UChar32* list;
00281 BMPSet *bmpSet;
00282 UChar32* buffer;
00283 int32_t bufferCapacity;
00284 int32_t patLen;
00285
00295 UChar *pat;
00296 UVector* strings;
00297 UnicodeSetStringSpan *stringSpan;
00298
00299 private:
00300 enum {
00301 kIsBogus = 1
00302 };
00303 uint8_t fFlags;
00304 public:
00314 inline UBool isBogus(void) const;
00315
00332 void setToBogus();
00333
00334 public:
00335
00336 enum {
00341 MIN_VALUE = 0,
00342
00347 MAX_VALUE = 0x10ffff
00348 };
00349
00350
00351
00352
00353
00354 public:
00355
00360 UnicodeSet();
00361
00370 UnicodeSet(UChar32 start, UChar32 end);
00371
00372 #ifndef U_HIDE_INTERNAL_API
00373
00376 enum ESerialization {
00377 kSerialized
00378 };
00379
00390 UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
00391 ESerialization serialization, UErrorCode &status);
00392 #endif
00393
00402 UnicodeSet(const UnicodeString& pattern,
00403 UErrorCode& status);
00404
00405 #ifndef U_HIDE_INTERNAL_API
00406
00418 UnicodeSet(const UnicodeString& pattern,
00419 uint32_t options,
00420 const SymbolTable* symbols,
00421 UErrorCode& status);
00422 #endif
00423
00437 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00438 uint32_t options,
00439 const SymbolTable* symbols,
00440 UErrorCode& status);
00441
00446 UnicodeSet(const UnicodeSet& o);
00447
00452 virtual ~UnicodeSet();
00453
00459 UnicodeSet& operator=(const UnicodeSet& o);
00460
00472 virtual UBool operator==(const UnicodeSet& o) const;
00473
00479 UBool operator!=(const UnicodeSet& o) const;
00480
00490 virtual UnicodeFunctor* clone() const;
00491
00499 virtual int32_t hashCode(void) const;
00500
00509 inline static UnicodeSet *fromUSet(USet *uset);
00510
00519 inline static const UnicodeSet *fromUSet(const USet *uset);
00520
00528 inline USet *toUSet();
00529
00530
00538 inline const USet * toUSet() const;
00539
00540
00541
00542
00543
00544
00553 inline UBool isFrozen() const;
00554
00568 UnicodeFunctor *freeze();
00569
00578 UnicodeFunctor *cloneAsThawed() const;
00579
00580
00581
00582
00583
00594 UnicodeSet& set(UChar32 start, UChar32 end);
00595
00601 static UBool resemblesPattern(const UnicodeString& pattern,
00602 int32_t pos);
00603
00616 UnicodeSet& applyPattern(const UnicodeString& pattern,
00617 UErrorCode& status);
00618
00619 #ifndef U_HIDE_INTERNAL_API
00620
00636 UnicodeSet& applyPattern(const UnicodeString& pattern,
00637 uint32_t options,
00638 const SymbolTable* symbols,
00639 UErrorCode& status);
00640 #endif
00641
00673 UnicodeSet& applyPattern(const UnicodeString& pattern,
00674 ParsePosition& pos,
00675 uint32_t options,
00676 const SymbolTable* symbols,
00677 UErrorCode& status);
00678
00692 virtual UnicodeString& toPattern(UnicodeString& result,
00693 UBool escapeUnprintable = FALSE) const;
00694
00717 UnicodeSet& applyIntPropertyValue(UProperty prop,
00718 int32_t value,
00719 UErrorCode& ec);
00720
00750 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00751 const UnicodeString& value,
00752 UErrorCode& ec);
00753
00762 virtual int32_t size(void) const;
00763
00770 virtual UBool isEmpty(void) const;
00771
00779 virtual UBool contains(UChar32 c) const;
00780
00789 virtual UBool contains(UChar32 start, UChar32 end) const;
00790
00798 UBool contains(const UnicodeString& s) const;
00799
00807 virtual UBool containsAll(const UnicodeSet& c) const;
00808
00816 UBool containsAll(const UnicodeString& s) const;
00817
00826 UBool containsNone(UChar32 start, UChar32 end) const;
00827
00835 UBool containsNone(const UnicodeSet& c) const;
00836
00844 UBool containsNone(const UnicodeString& s) const;
00845
00854 inline UBool containsSome(UChar32 start, UChar32 end) const;
00855
00863 inline UBool containsSome(const UnicodeSet& s) const;
00864
00872 inline UBool containsSome(const UnicodeString& s) const;
00873
00892 int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00893
00906 inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
00907
00925 int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00926
00940 inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
00941
00960 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00961
00979 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00980
00985 virtual UMatchDegree matches(const Replaceable& text,
00986 int32_t& offset,
00987 int32_t limit,
00988 UBool incremental);
00989
00990 private:
01013 static int32_t matchRest(const Replaceable& text,
01014 int32_t start, int32_t limit,
01015 const UnicodeString& s);
01016
01026 int32_t findCodePoint(UChar32 c) const;
01027
01028 public:
01029
01037 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
01038
01047 int32_t indexOf(UChar32 c) const;
01048
01058 UChar32 charAt(int32_t index) const;
01059
01074 virtual UnicodeSet& add(UChar32 start, UChar32 end);
01075
01083 UnicodeSet& add(UChar32 c);
01084
01096 UnicodeSet& add(const UnicodeString& s);
01097
01098 private:
01104 static int32_t getSingleCP(const UnicodeString& s);
01105
01106 void _add(const UnicodeString& s);
01107
01108 public:
01117 UnicodeSet& addAll(const UnicodeString& s);
01118
01127 UnicodeSet& retainAll(const UnicodeString& s);
01128
01137 UnicodeSet& complementAll(const UnicodeString& s);
01138
01147 UnicodeSet& removeAll(const UnicodeString& s);
01148
01157 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01158
01159
01167 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01168
01182 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01183
01184
01190 UnicodeSet& retain(UChar32 c);
01191
01205 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01206
01214 UnicodeSet& remove(UChar32 c);
01215
01225 UnicodeSet& remove(const UnicodeString& s);
01226
01234 virtual UnicodeSet& complement(void);
01235
01250 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01251
01259 UnicodeSet& complement(UChar32 c);
01260
01271 UnicodeSet& complement(const UnicodeString& s);
01272
01285 virtual UnicodeSet& addAll(const UnicodeSet& c);
01286
01298 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01299
01311 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01312
01323 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01324
01331 virtual UnicodeSet& clear(void);
01332
01358 UnicodeSet& closeOver(int32_t attribute);
01359
01366 virtual UnicodeSet &removeAllStrings();
01367
01375 virtual int32_t getRangeCount(void) const;
01376
01384 virtual UChar32 getRangeStart(int32_t index) const;
01385
01393 virtual UChar32 getRangeEnd(int32_t index) const;
01394
01443 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01444
01451 virtual UnicodeSet& compact();
01452
01464 static UClassID U_EXPORT2 getStaticClassID(void);
01465
01474 virtual UClassID getDynamicClassID(void) const;
01475
01476 private:
01477
01478
01479
01480 friend class USetAccess;
01481
01482 int32_t getStringCount() const;
01483
01484 const UnicodeString* getString(int32_t index) const;
01485
01486
01487
01488
01489
01490 private:
01491
01497 virtual UBool matchesIndexValue(uint8_t v) const;
01498
01499 private:
01500 friend class RBBIRuleScanner;
01501
01502
01503
01504
01505
01506 UnicodeSet(const UnicodeSet& o, UBool );
01507
01508
01509
01510
01511
01512 void applyPatternIgnoreSpace(const UnicodeString& pattern,
01513 ParsePosition& pos,
01514 const SymbolTable* symbols,
01515 UErrorCode& status);
01516
01517 void applyPattern(RuleCharacterIterator& chars,
01518 const SymbolTable* symbols,
01519 UnicodeString& rebuiltPat,
01520 uint32_t options,
01521 UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
01522 UErrorCode& ec);
01523
01524
01525
01526
01527
01528 void ensureCapacity(int32_t newLen, UErrorCode& ec);
01529
01530 void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
01531
01532 void swapBuffers(void);
01533
01534 UBool allocateStrings(UErrorCode &status);
01535
01536 UnicodeString& _toPattern(UnicodeString& result,
01537 UBool escapeUnprintable) const;
01538
01539 UnicodeString& _generatePattern(UnicodeString& result,
01540 UBool escapeUnprintable) const;
01541
01542 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01543
01544 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01545
01546
01547
01548
01549
01550 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01551
01552 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01553
01554 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01555
01561 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01562 int32_t pos);
01563
01564 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01565 int32_t iterOpts);
01566
01606 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01607 ParsePosition& ppos,
01608 UErrorCode &ec);
01609
01610 void applyPropertyPattern(RuleCharacterIterator& chars,
01611 UnicodeString& rebuiltPat,
01612 UErrorCode& ec);
01613
01614 friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
01615 static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01616
01621 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01622
01632 void applyFilter(Filter filter,
01633 void* context,
01634 int32_t src,
01635 UErrorCode &status);
01636
01640 void setPattern(const UnicodeString& newPat);
01644 void releasePattern();
01645
01646 friend class UnicodeSetIterator;
01647 };
01648
01649
01650
01651 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01652 return !operator==(o);
01653 }
01654
01655 inline UBool UnicodeSet::isFrozen() const {
01656 return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01657 }
01658
01659 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01660 return !containsNone(start, end);
01661 }
01662
01663 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01664 return !containsNone(s);
01665 }
01666
01667 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01668 return !containsNone(s);
01669 }
01670
01671 inline UBool UnicodeSet::isBogus() const {
01672 return (UBool)(fFlags & kIsBogus);
01673 }
01674
01675 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
01676 return reinterpret_cast<UnicodeSet *>(uset);
01677 }
01678
01679 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
01680 return reinterpret_cast<const UnicodeSet *>(uset);
01681 }
01682
01683 inline USet *UnicodeSet::toUSet() {
01684 return reinterpret_cast<USet *>(this);
01685 }
01686
01687 inline const USet *UnicodeSet::toUSet() const {
01688 return reinterpret_cast<const USet *>(this);
01689 }
01690
01691 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
01692 int32_t sLength=s.length();
01693 if(start<0) {
01694 start=0;
01695 } else if(start>sLength) {
01696 start=sLength;
01697 }
01698 return start+span(s.getBuffer()+start, sLength-start, spanCondition);
01699 }
01700
01701 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
01702 int32_t sLength=s.length();
01703 if(limit<0) {
01704 limit=0;
01705 } else if(limit>sLength) {
01706 limit=sLength;
01707 }
01708 return spanBack(s.getBuffer(), limit, spanCondition);
01709 }
01710
01711 U_NAMESPACE_END
01712
01713 #endif