00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef __NORMALIZER2_H__
00018 #define __NORMALIZER2_H__
00019
00025 #include "unicode/utypes.h"
00026
00027 #if !UCONFIG_NO_NORMALIZATION
00028
00029 #include "unicode/uniset.h"
00030 #include "unicode/unistr.h"
00031 #include "unicode/unorm2.h"
00032
00033 U_NAMESPACE_BEGIN
00034
00078 class U_COMMON_API Normalizer2 : public UObject {
00079 public:
00084 ~Normalizer2();
00085
00097 static const Normalizer2 *
00098 getNFCInstance(UErrorCode &errorCode);
00099
00111 static const Normalizer2 *
00112 getNFDInstance(UErrorCode &errorCode);
00113
00125 static const Normalizer2 *
00126 getNFKCInstance(UErrorCode &errorCode);
00127
00139 static const Normalizer2 *
00140 getNFKDInstance(UErrorCode &errorCode);
00141
00153 static const Normalizer2 *
00154 getNFKCCasefoldInstance(UErrorCode &errorCode);
00155
00177 static const Normalizer2 *
00178 getInstance(const char *packageName,
00179 const char *name,
00180 UNormalization2Mode mode,
00181 UErrorCode &errorCode);
00182
00193 UnicodeString
00194 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
00195 UnicodeString result;
00196 normalize(src, result, errorCode);
00197 return result;
00198 }
00212 virtual UnicodeString &
00213 normalize(const UnicodeString &src,
00214 UnicodeString &dest,
00215 UErrorCode &errorCode) const = 0;
00230 virtual UnicodeString &
00231 normalizeSecondAndAppend(UnicodeString &first,
00232 const UnicodeString &second,
00233 UErrorCode &errorCode) const = 0;
00248 virtual UnicodeString &
00249 append(UnicodeString &first,
00250 const UnicodeString &second,
00251 UErrorCode &errorCode) const = 0;
00252
00266 virtual UBool
00267 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
00268
00293 virtual UBool
00294 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
00295
00311 virtual UChar32
00312 composePair(UChar32 a, UChar32 b) const;
00313
00322 virtual uint8_t
00323 getCombiningClass(UChar32 c) const;
00324
00339 virtual UBool
00340 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00341
00357 virtual UNormalizationCheckResult
00358 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00359
00382 virtual int32_t
00383 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00384
00398 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
00399
00414 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
00415
00429 virtual UBool isInert(UChar32 c) const = 0;
00430 };
00431
00443 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
00444 public:
00455 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
00456 norm2(n2), set(filterSet) {}
00457
00462 ~FilteredNormalizer2();
00463
00477 virtual UnicodeString &
00478 normalize(const UnicodeString &src,
00479 UnicodeString &dest,
00480 UErrorCode &errorCode) const;
00495 virtual UnicodeString &
00496 normalizeSecondAndAppend(UnicodeString &first,
00497 const UnicodeString &second,
00498 UErrorCode &errorCode) const;
00513 virtual UnicodeString &
00514 append(UnicodeString &first,
00515 const UnicodeString &second,
00516 UErrorCode &errorCode) const;
00517
00529 virtual UBool
00530 getDecomposition(UChar32 c, UnicodeString &decomposition) const;
00531
00543 virtual UBool
00544 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
00545
00556 virtual UChar32
00557 composePair(UChar32 a, UChar32 b) const;
00558
00567 virtual uint8_t
00568 getCombiningClass(UChar32 c) const;
00569
00581 virtual UBool
00582 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
00594 virtual UNormalizationCheckResult
00595 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
00607 virtual int32_t
00608 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
00609
00618 virtual UBool hasBoundaryBefore(UChar32 c) const;
00619
00628 virtual UBool hasBoundaryAfter(UChar32 c) const;
00629
00637 virtual UBool isInert(UChar32 c) const;
00638 private:
00639 UnicodeString &
00640 normalize(const UnicodeString &src,
00641 UnicodeString &dest,
00642 USetSpanCondition spanCondition,
00643 UErrorCode &errorCode) const;
00644
00645 UnicodeString &
00646 normalizeSecondAndAppend(UnicodeString &first,
00647 const UnicodeString &second,
00648 UBool doNormalize,
00649 UErrorCode &errorCode) const;
00650
00651 const Normalizer2 &norm2;
00652 const UnicodeSet &set;
00653 };
00654
00655 U_NAMESPACE_END
00656
00657 #endif // !UCONFIG_NO_NORMALIZATION
00658 #endif // __NORMALIZER2_H__