00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef __NORMALIZER2_H__
00020 #define __NORMALIZER2_H__
00021
00027 #include "unicode/utypes.h"
00028
00029 #if !UCONFIG_NO_NORMALIZATION
00030
00031 #include "unicode/stringpiece.h"
00032 #include "unicode/uniset.h"
00033 #include "unicode/unistr.h"
00034 #include "unicode/unorm2.h"
00035
00036 U_NAMESPACE_BEGIN
00037
00038 class ByteSink;
00039
00083 class U_COMMON_API Normalizer2 : public UObject {
00084 public:
00089 ~Normalizer2();
00090
00102 static const Normalizer2 *
00103 getNFCInstance(UErrorCode &errorCode);
00104
00116 static const Normalizer2 *
00117 getNFDInstance(UErrorCode &errorCode);
00118
00130 static const Normalizer2 *
00131 getNFKCInstance(UErrorCode &errorCode);
00132
00144 static const Normalizer2 *
00145 getNFKDInstance(UErrorCode &errorCode);
00146
00158 static const Normalizer2 *
00159 getNFKCCasefoldInstance(UErrorCode &errorCode);
00160
00182 static const Normalizer2 *
00183 getInstance(const char *packageName,
00184 const char *name,
00185 UNormalization2Mode mode,
00186 UErrorCode &errorCode);
00187
00198 UnicodeString
00199 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
00200 UnicodeString result;
00201 normalize(src, result, errorCode);
00202 return result;
00203 }
00217 virtual UnicodeString &
00218 normalize(const UnicodeString &src,
00219 UnicodeString &dest,
00220 UErrorCode &errorCode) const = 0;
00221
00246 virtual void
00247 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
00248 Edits *edits, UErrorCode &errorCode) const;
00249
00264 virtual UnicodeString &
00265 normalizeSecondAndAppend(UnicodeString &first,
00266 const UnicodeString &second,
00267 UErrorCode &errorCode) const = 0;
00282 virtual UnicodeString &
00283 append(UnicodeString &first,
00284 const UnicodeString &second,
00285 UErrorCode &errorCode) const = 0;
00286
00300 virtual UBool
00301 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
00302
00327 virtual UBool
00328 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
00329
00345 virtual UChar32
00346 composePair(UChar32 a, UChar32 b) const;
00347
00356 virtual uint8_t
00357 getCombiningClass(UChar32 c) const;
00358
00373 virtual UBool
00374 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00396 virtual UBool
00397 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
00398
00399
00415 virtual UNormalizationCheckResult
00416 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00417
00440 virtual int32_t
00441 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00442
00456 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
00457
00472 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
00473
00487 virtual UBool isInert(UChar32 c) const = 0;
00488 };
00489
00501 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
00502 public:
00513 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
00514 norm2(n2), set(filterSet) {}
00515
00520 ~FilteredNormalizer2();
00521
00535 virtual UnicodeString &
00536 normalize(const UnicodeString &src,
00537 UnicodeString &dest,
00538 UErrorCode &errorCode) const U_OVERRIDE;
00539
00564 virtual void
00565 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
00566 Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
00567
00582 virtual UnicodeString &
00583 normalizeSecondAndAppend(UnicodeString &first,
00584 const UnicodeString &second,
00585 UErrorCode &errorCode) const U_OVERRIDE;
00600 virtual UnicodeString &
00601 append(UnicodeString &first,
00602 const UnicodeString &second,
00603 UErrorCode &errorCode) const U_OVERRIDE;
00604
00616 virtual UBool
00617 getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
00618
00630 virtual UBool
00631 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
00632
00643 virtual UChar32
00644 composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
00645
00654 virtual uint8_t
00655 getCombiningClass(UChar32 c) const U_OVERRIDE;
00656
00668 virtual UBool
00669 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00691 virtual UBool
00692 isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
00704 virtual UNormalizationCheckResult
00705 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00717 virtual int32_t
00718 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
00719
00728 virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
00729
00738 virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
00739
00747 virtual UBool isInert(UChar32 c) const U_OVERRIDE;
00748 private:
00749 UnicodeString &
00750 normalize(const UnicodeString &src,
00751 UnicodeString &dest,
00752 USetSpanCondition spanCondition,
00753 UErrorCode &errorCode) const;
00754
00755 void
00756 normalizeUTF8(uint32_t options, const char *src, int32_t length,
00757 ByteSink &sink, Edits *edits,
00758 USetSpanCondition spanCondition,
00759 UErrorCode &errorCode) const;
00760
00761 UnicodeString &
00762 normalizeSecondAndAppend(UnicodeString &first,
00763 const UnicodeString &second,
00764 UBool doNormalize,
00765 UErrorCode &errorCode) const;
00766
00767 const Normalizer2 &norm2;
00768 const UnicodeSet &set;
00769 };
00770
00771 U_NAMESPACE_END
00772
00773 #endif // !UCONFIG_NO_NORMALIZATION
00774 #endif // __NORMALIZER2_H__