00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef __NORMALIZER2_H__
00020 #define __NORMALIZER2_H__
00021
00027 #include "unicode/utypes.h"
00028
00029 #if U_SHOW_CPLUSPLUS_API
00030
00031 #if !UCONFIG_NO_NORMALIZATION
00032
00033 #include "unicode/stringpiece.h"
00034 #include "unicode/uniset.h"
00035 #include "unicode/unistr.h"
00036 #include "unicode/unorm2.h"
00037
00038 U_NAMESPACE_BEGIN
00039
00040 class ByteSink;
00041
00085 class U_COMMON_API Normalizer2 : public UObject {
00086 public:
00091 ~Normalizer2();
00092
00104 static const Normalizer2 *
00105 getNFCInstance(UErrorCode &errorCode);
00106
00118 static const Normalizer2 *
00119 getNFDInstance(UErrorCode &errorCode);
00120
00132 static const Normalizer2 *
00133 getNFKCInstance(UErrorCode &errorCode);
00134
00146 static const Normalizer2 *
00147 getNFKDInstance(UErrorCode &errorCode);
00148
00160 static const Normalizer2 *
00161 getNFKCCasefoldInstance(UErrorCode &errorCode);
00162
00184 static const Normalizer2 *
00185 getInstance(const char *packageName,
00186 const char *name,
00187 UNormalization2Mode mode,
00188 UErrorCode &errorCode);
00189
00200 UnicodeString
00201 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
00202 UnicodeString result;
00203 normalize(src, result, errorCode);
00204 return result;
00205 }
00219 virtual UnicodeString &
00220 normalize(const UnicodeString &src,
00221 UnicodeString &dest,
00222 UErrorCode &errorCode) const = 0;
00223
00248 virtual void
00249 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
00250 Edits *edits, UErrorCode &errorCode) const;
00251
00266 virtual UnicodeString &
00267 normalizeSecondAndAppend(UnicodeString &first,
00268 const UnicodeString &second,
00269 UErrorCode &errorCode) const = 0;
00284 virtual UnicodeString &
00285 append(UnicodeString &first,
00286 const UnicodeString &second,
00287 UErrorCode &errorCode) const = 0;
00288
00302 virtual UBool
00303 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
00304
00329 virtual UBool
00330 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
00331
00347 virtual UChar32
00348 composePair(UChar32 a, UChar32 b) const;
00349
00358 virtual uint8_t
00359 getCombiningClass(UChar32 c) const;
00360
00375 virtual UBool
00376 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00398 virtual UBool
00399 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
00400
00401
00417 virtual UNormalizationCheckResult
00418 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00419
00442 virtual int32_t
00443 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00444
00458 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
00459
00474 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
00475
00489 virtual UBool isInert(UChar32 c) const = 0;
00490 };
00491
00503 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
00504 public:
00515 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
00516 norm2(n2), set(filterSet) {}
00517
00522 ~FilteredNormalizer2();
00523
00537 virtual UnicodeString &
00538 normalize(const UnicodeString &src,
00539 UnicodeString &dest,
00540 UErrorCode &errorCode) const U_OVERRIDE;
00541
00566 virtual void
00567 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
00568 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
00569
00584 virtual UnicodeString &
00585 normalizeSecondAndAppend(UnicodeString &first,
00586 const UnicodeString &second,
00587 UErrorCode &errorCode) const U_OVERRIDE;
00602 virtual UnicodeString &
00603 append(UnicodeString &first,
00604 const UnicodeString &second,
00605 UErrorCode &errorCode) const U_OVERRIDE;
00606
00618 virtual UBool
00619 getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
00620
00632 virtual UBool
00633 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
00634
00645 virtual UChar32
00646 composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
00647
00656 virtual uint8_t
00657 getCombiningClass(UChar32 c) const U_OVERRIDE;
00658
00670 virtual UBool
00671 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00693 virtual UBool
00694 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
00706 virtual UNormalizationCheckResult
00707 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00719 virtual int32_t
00720 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00721
00730 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
00731
00740 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
00741
00749 virtual UBool isInert(UChar32 c) const U_OVERRIDE;
00750 private:
00751 UnicodeString &
00752 normalize(const UnicodeString &src,
00753 UnicodeString &dest,
00754 USetSpanCondition spanCondition,
00755 UErrorCode &errorCode) const;
00756
00757 void
00758 normalizeUTF8(uint32_t options, const char *src, int32_t length,
00759 ByteSink &sink, Edits *edits,
00760 USetSpanCondition spanCondition,
00761 UErrorCode &errorCode) const;
00762
00763 UnicodeString &
00764 normalizeSecondAndAppend(UnicodeString &first,
00765 const UnicodeString &second,
00766 UBool doNormalize,
00767 UErrorCode &errorCode) const;
00768
00769 const Normalizer2 &norm2;
00770 const UnicodeSet &set;
00771 };
00772
00773 U_NAMESPACE_END
00774
00775 #endif // !UCONFIG_NO_NORMALIZATION
00776
00777 #endif
00778
00779 #endif // __NORMALIZER2_H__