ICU 75.1  75.1
uset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uset.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar07
16 * created by: Markus W. Scherer
17 *
18 * C version of UnicodeSet.
19 */
20 
21 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include "unicode/localpointer.h"
37 #endif // U_SHOW_CPLUSPLUS_API
38 
39 #ifndef USET_DEFINED
40 
41 #ifndef U_IN_DOXYGEN
42 #define USET_DEFINED
43 #endif
44 
50 typedef struct USet USet;
51 #endif
52 
64 enum {
70 
98 
111 
125 };
126 
182 typedef enum USetSpanCondition {
231 #ifndef U_HIDE_DEPRECATED_API
232 
237 #endif // U_HIDE_DEPRECATED_API
239 
240 enum {
248 };
249 
255 typedef struct USerializedSet {
260  const uint16_t *array;
265  int32_t bmpLength;
270  int32_t length;
277 
278 /*********************************************************************
279  * USet API
280  *********************************************************************/
281 
289 U_CAPI USet* U_EXPORT2
290 uset_openEmpty(void);
291 
302 U_CAPI USet* U_EXPORT2
303 uset_open(UChar32 start, UChar32 end);
304 
314 U_CAPI USet* U_EXPORT2
315 uset_openPattern(const UChar* pattern, int32_t patternLength,
316  UErrorCode* ec);
317 
331 U_CAPI USet* U_EXPORT2
332 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
333  uint32_t options,
334  UErrorCode* ec);
335 
342 U_CAPI void U_EXPORT2
343 uset_close(USet* set);
344 
345 #if U_SHOW_CPLUSPLUS_API
346 
347 U_NAMESPACE_BEGIN
348 
359 
360 U_NAMESPACE_END
361 
362 #endif
363 
373 U_CAPI USet * U_EXPORT2
374 uset_clone(const USet *set);
375 
385 U_CAPI UBool U_EXPORT2
386 uset_isFrozen(const USet *set);
387 
402 U_CAPI void U_EXPORT2
403 uset_freeze(USet *set);
404 
415 U_CAPI USet * U_EXPORT2
416 uset_cloneAsThawed(const USet *set);
417 
427 U_CAPI void U_EXPORT2
428 uset_set(USet* set,
429  UChar32 start, UChar32 end);
430 
455 U_CAPI int32_t U_EXPORT2
457  const UChar *pattern, int32_t patternLength,
458  uint32_t options,
459  UErrorCode *status);
460 
483 U_CAPI void U_EXPORT2
485  UProperty prop, int32_t value, UErrorCode* ec);
486 
522 U_CAPI void U_EXPORT2
524  const UChar *prop, int32_t propLength,
525  const UChar *value, int32_t valueLength,
526  UErrorCode* ec);
527 
537 U_CAPI UBool U_EXPORT2
538 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
539  int32_t pos);
540 
556 U_CAPI int32_t U_EXPORT2
557 uset_toPattern(const USet* set,
558  UChar* result, int32_t resultCapacity,
559  UBool escapeUnprintable,
560  UErrorCode* ec);
561 
570 U_CAPI void U_EXPORT2
571 uset_add(USet* set, UChar32 c);
572 
585 U_CAPI void U_EXPORT2
586 uset_addAll(USet* set, const USet *additionalSet);
587 
597 U_CAPI void U_EXPORT2
598 uset_addRange(USet* set, UChar32 start, UChar32 end);
599 
609 U_CAPI void U_EXPORT2
610 uset_addString(USet* set, const UChar* str, int32_t strLen);
611 
621 U_CAPI void U_EXPORT2
622 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
623 
632 U_CAPI void U_EXPORT2
633 uset_remove(USet* set, UChar32 c);
634 
644 U_CAPI void U_EXPORT2
645 uset_removeRange(USet* set, UChar32 start, UChar32 end);
646 
656 U_CAPI void U_EXPORT2
657 uset_removeString(USet* set, const UChar* str, int32_t strLen);
658 
668 U_CAPI void U_EXPORT2
669 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
670 
682 U_CAPI void U_EXPORT2
683 uset_removeAll(USet* set, const USet* removeSet);
684 
697 U_CAPI void U_EXPORT2
698 uset_retain(USet* set, UChar32 start, UChar32 end);
699 
711 U_CAPI void U_EXPORT2
712 uset_retainString(USet *set, const UChar *str, int32_t length);
713 
723 U_CAPI void U_EXPORT2
724 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
725 
738 U_CAPI void U_EXPORT2
739 uset_retainAll(USet* set, const USet* retain);
740 
749 U_CAPI void U_EXPORT2
750 uset_compact(USet* set);
751 
765 U_CAPI void U_EXPORT2
766 uset_complement(USet* set);
767 
781 U_CAPI void U_EXPORT2
782 uset_complementRange(USet *set, UChar32 start, UChar32 end);
783 
794 U_CAPI void U_EXPORT2
795 uset_complementString(USet *set, const UChar *str, int32_t length);
796 
806 U_CAPI void U_EXPORT2
807 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
808 
820 U_CAPI void U_EXPORT2
821 uset_complementAll(USet* set, const USet* complement);
822 
830 U_CAPI void U_EXPORT2
831 uset_clear(USet* set);
832 
861 U_CAPI void U_EXPORT2
862 uset_closeOver(USet* set, int32_t attributes);
863 
870 U_CAPI void U_EXPORT2
872 
880 U_CAPI UBool U_EXPORT2
881 uset_isEmpty(const USet* set);
882 
888 U_CAPI UBool U_EXPORT2
889 uset_hasStrings(const USet *set);
890 
899 U_CAPI UBool U_EXPORT2
900 uset_contains(const USet* set, UChar32 c);
901 
911 U_CAPI UBool U_EXPORT2
912 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
913 
922 U_CAPI UBool U_EXPORT2
923 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
924 
935 U_CAPI int32_t U_EXPORT2
936 uset_indexOf(const USet* set, UChar32 c);
937 
953 U_CAPI UChar32 U_EXPORT2
954 uset_charAt(const USet* set, int32_t charIndex);
955 
969 U_CAPI int32_t U_EXPORT2
970 uset_size(const USet* set);
971 
980 U_CAPI int32_t U_EXPORT2
981 uset_getRangeCount(const USet *set);
982 
991 U_CAPI int32_t U_EXPORT2
992 uset_getItemCount(const USet* set);
993 
1022 U_CAPI int32_t U_EXPORT2
1023 uset_getItem(const USet* set, int32_t itemIndex,
1024  UChar32* start, UChar32* end,
1025  UChar* str, int32_t strCapacity,
1026  UErrorCode* ec);
1027 
1036 U_CAPI UBool U_EXPORT2
1037 uset_containsAll(const USet* set1, const USet* set2);
1038 
1049 U_CAPI UBool U_EXPORT2
1050 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1051 
1060 U_CAPI UBool U_EXPORT2
1061 uset_containsNone(const USet* set1, const USet* set2);
1062 
1071 U_CAPI UBool U_EXPORT2
1072 uset_containsSome(const USet* set1, const USet* set2);
1073 
1093 U_CAPI int32_t U_EXPORT2
1094 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1095 
1114 U_CAPI int32_t U_EXPORT2
1115 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1116 
1136 U_CAPI int32_t U_EXPORT2
1137 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1138 
1157 U_CAPI int32_t U_EXPORT2
1158 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1159 
1168 U_CAPI UBool U_EXPORT2
1169 uset_equals(const USet* set1, const USet* set2);
1170 
1171 /*********************************************************************
1172  * Serialized set API
1173  *********************************************************************/
1174 
1224 U_CAPI int32_t U_EXPORT2
1225 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1226 
1235 U_CAPI UBool U_EXPORT2
1236 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1237 
1245 U_CAPI void U_EXPORT2
1247 
1256 U_CAPI UBool U_EXPORT2
1258 
1268 U_CAPI int32_t U_EXPORT2
1270 
1284 U_CAPI UBool U_EXPORT2
1285 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1286  UChar32* pStart, UChar32* pEnd);
1287 
1288 #endif
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
Enable case insensitive matching.
Definition: uset.h:124
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:275
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
Spans the longest substring that is a concatenation of set elements (characters or strings)...
Definition: uset.h:210
One more than the last span condition.
Definition: uset.h:236
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:260
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
"Smart pointer" class, closes a USet via uset_close().
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:69
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end...
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:182
C API: Unicode Properties.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
int32_t length
The total length of the array.
Definition: uset.h:270
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:196
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
Capacity of USerializedSet::staticArray.
Definition: uset.h:247
Enable case insensitive matching.
Definition: uset.h:97
Adds all case mappings for each element in the set.
Definition: uset.h:110
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:265
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
A serialized form of a Unicode set.
Definition: uset.h:255
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
Continues a span() while there is a set element at the current position.
Definition: uset.h:230
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
Continues a span() while there is no set element at the current position.
Definition: uset.h:195
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247