Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/unicode/uniset.h


$ cat -n /usr/include/unicode/uniset.h

     1	// © 2016 and later: Unicode, Inc. and others.
     2	// License & terms of use: http://www.unicode.org/copyright.html
     3	/*
     4	***************************************************************************
     5	* Copyright (C) 1999-2016, International Business Machines Corporation
     6	* and others. All Rights Reserved.
     7	***************************************************************************
     8	*   Date        Name        Description
     9	*   10/20/99    alan        Creation.
    10	***************************************************************************
    11	*/
    12	
    13	#ifndef UNICODESET_H
    14	#define UNICODESET_H
    15	
    16	#include "unicode/utypes.h"
    17	
    18	#if U_SHOW_CPLUSPLUS_API
    19	
    20	#include "unicode/ucpmap.h"
    21	#include "unicode/unifilt.h"
    22	#include "unicode/unistr.h"
    23	#include "unicode/uset.h"
    24	
    25	/**
    26	 * \file
    27	 * \brief C++ API: Unicode Set
    28	 */
    29	
    30	U_NAMESPACE_BEGIN
    31	
    32	// Forward Declarations.
    33	class BMPSet;
    34	class ParsePosition;
    35	class RBBIRuleScanner;
    36	class SymbolTable;
    37	class UnicodeSetStringSpan;
    38	class UVector;
    39	class RuleCharacterIterator;
    40	
    41	/**
    42	 * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
    43	 * represent <em>character classes</em> used in regular expressions.
    44	 * A character specifies a subset of Unicode code points.  Legal
    45	 * code points are U+0000 to U+10FFFF, inclusive.
    46	 *
    47	 * <p>The UnicodeSet class is not designed to be subclassed.
    48	 *
    49	 * <p><code>UnicodeSet</code> supports two APIs. The first is the
    50	 * <em>operand</em> API that allows the caller to modify the value of
    51	 * a <code>UnicodeSet</code> object. It conforms to Java 2's
    52	 * <code>java.util.Set</code> interface, although
    53	 * <code>UnicodeSet</code> does not actually implement that
    54	 * interface. All methods of <code>Set</code> are supported, with the
    55	 * modification that they take a character range or single character
    56	 * instead of an <code>Object</code>, and they take a
    57	 * <code>UnicodeSet</code> instead of a <code>Collection</code>.  The
    58	 * operand API may be thought of in terms of boolean logic: a boolean
    59	 * OR is implemented by <code>add</code>, a boolean AND is implemented
    60	 * by <code>retain</code>, a boolean XOR is implemented by
    61	 * <code>complement</code> taking an argument, and a boolean NOT is
    62	 * implemented by <code>complement</code> with no argument.  In terms
    63	 * of traditional set theory function names, <code>add</code> is a
    64	 * union, <code>retain</code> is an intersection, <code>remove</code>
    65	 * is an asymmetric difference, and <code>complement</code> with no
    66	 * argument is a set complement with respect to the superset range
    67	 * <code>MIN_VALUE-MAX_VALUE</code>
    68	 *
    69	 * <p>The second API is the
    70	 * <code>applyPattern()</code>/<code>toPattern()</code> API from the
    71	 * <code>java.text.Format</code>-derived classes.  Unlike the
    72	 * methods that add characters, add categories, and control the logic
    73	 * of the set, the method <code>applyPattern()</code> sets all
    74	 * attributes of a <code>UnicodeSet</code> at once, based on a
    75	 * string pattern.
    76	 *
    77	 * <p><b>Pattern syntax</b></p>
    78	 *
    79	 * Patterns are accepted by the constructors and the
    80	 * <code>applyPattern()</code> methods and returned by the
    81	 * <code>toPattern()</code> method.  These patterns follow a syntax
    82	 * similar to that employed by version 8 regular expression character
    83	 * classes.  Here are some simple examples:
    84	 *
    85	 * \htmlonly<blockquote>\endhtmlonly
    86	 *   <table>
    87	 *     <tr align="top">
    88	 *       <td nowrap valign="top" align="left"><code>[]</code></td>
    89	 *       <td valign="top">No characters</td>
    90	 *     </tr><tr align="top">
    91	 *       <td nowrap valign="top" align="left"><code>[a]</code></td>
    92	 *       <td valign="top">The character 'a'</td>
    93	 *     </tr><tr align="top">
    94	 *       <td nowrap valign="top" align="left"><code>[ae]</code></td>
    95	 *       <td valign="top">The characters 'a' and 'e'</td>
    96	 *     </tr>
    97	 *     <tr>
    98	 *       <td nowrap valign="top" align="left"><code>[a-e]</code></td>
    99	 *       <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
   100	 *       point order</td>
   101	 *     </tr>
   102	 *     <tr>
   103	 *       <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
   104	 *       <td valign="top">The character U+4E01</td>
   105	 *     </tr>
   106	 *     <tr>
   107	 *       <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
   108	 *       <td valign="top">The character 'a' and the multicharacter strings "ab" and
   109	 *       "ac"</td>
   110	 *     </tr>
   111	 *     <tr>
   112	 *       <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
   113	 *       <td valign="top">All characters in the general category Uppercase Letter</td>
   114	 *     </tr>
   115	 *   </table>
   116	 * \htmlonly</blockquote>\endhtmlonly
   117	 *
   118	 * Any character may be preceded by a backslash in order to remove any special
   119	 * meaning.  White space characters, as defined by UCharacter.isWhitespace(), are
   120	 * ignored, unless they are escaped.
   121	 *
   122	 * <p>Property patterns specify a set of characters having a certain
   123	 * property as defined by the Unicode standard.  Both the POSIX-like
   124	 * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized.  For a
   125	 * complete list of supported property patterns, see the User's Guide
   126	 * for UnicodeSet at
   127	 * <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset">
   128	 * https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>.
   129	 * Actual determination of property data is defined by the underlying
   130	 * Unicode database as implemented by UCharacter.
   131	 *
   132	 * <p>Patterns specify individual characters, ranges of characters, and
   133	 * Unicode property sets.  When elements are concatenated, they
   134	 * specify their union.  To complement a set, place a '^' immediately
   135	 * after the opening '['.  Property patterns are inverted by modifying
   136	 * their delimiters; "[:^foo]" and "\\P{foo}".  In any other location,
   137	 * '^' has no special meaning.
   138	 *
   139	 * <p>Since ICU 70, "[^...]", "[:^foo]", "\\P{foo}", and "[:binaryProperty=No:]"
   140	 * perform a “code point complement” (all code points minus the original set),
   141	 * removing all multicharacter strings,
   142	 * equivalent to <code>.complement().removeAllStrings()</code>.
   143	 * The complement() API function continues to perform a
   144	 * symmetric difference with all code points and thus retains all multicharacter strings.
   145	 *
   146	 * <p>Ranges are indicated by placing two a '-' between two
   147	 * characters, as in "a-z".  This specifies the range of all
   148	 * characters from the left to the right, in Unicode order.  If the
   149	 * left character is greater than or equal to the
   150	 * right character it is a syntax error.  If a '-' occurs as the first
   151	 * character after the opening '[' or '[^', or if it occurs as the
   152	 * last character before the closing ']', then it is taken as a
   153	 * literal.  Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
   154	 * set of three characters, 'a', 'b', and '-'.
   155	 *
   156	 * <p>Sets may be intersected using the '&' operator or the asymmetric
   157	 * set difference may be taken using the '-' operator, for example,
   158	 * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
   159	 * with values less than 4096.  Operators ('&' and '|') have equal
   160	 * precedence and bind left-to-right.  Thus
   161	 * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
   162	 * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]".  This only really matters for
   163	 * difference; intersection is commutative.
   164	 *
   165	 * <table>
   166	 * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
   167	 * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
   168	 * through 'z' and all letters in between, in Unicode order
   169	 * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
   170	 * all characters but 'a' through 'z',
   171	 * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
   172	 * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
   173	 * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
   174	 * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
   175	 * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
   176	 * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
   177	 * <td>The asymmetric difference of sets specified by <em>pat1</em> and
   178	 * <em>pat2</em>
   179	 * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
   180	 * <td>The set of characters having the specified
   181	 * Unicode property; in
   182	 * this case, Unicode uppercase letters
   183	 * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
   184	 * <td>The set of characters <em>not</em> having the given
   185	 * Unicode property
   186	 * </table>
   187	 *
   188	 * <p><b>Formal syntax</b></p>
   189	 *
   190	 * \htmlonly<blockquote>\endhtmlonly
   191	 *   <table>
   192	 *     <tr align="top">
   193	 *       <td nowrap valign="top" align="right"><code>pattern :=  </code></td>
   194	 *       <td valign="top"><code>('[' '^'? item* ']') |
   195	 *       property</code></td>
   196	 *     </tr>
   197	 *     <tr align="top">
   198	 *       <td nowrap valign="top" align="right"><code>item :=  </code></td>
   199	 *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
   200	 *       </code></td>
   201	 *     </tr>
   202	 *     <tr align="top">
   203	 *       <td nowrap valign="top" align="right"><code>pattern-expr :=  </code></td>
   204	 *       <td valign="top"><code>pattern | pattern-expr pattern |
   205	 *       pattern-expr op pattern<br>
   206	 *       </code></td>
   207	 *     </tr>
   208	 *     <tr align="top">
   209	 *       <td nowrap valign="top" align="right"><code>op :=  </code></td>
   210	 *       <td valign="top"><code>'&' | '-'<br>
   211	 *       </code></td>
   212	 *     </tr>
   213	 *     <tr align="top">
   214	 *       <td nowrap valign="top" align="right"><code>special :=  </code></td>
   215	 *       <td valign="top"><code>'[' | ']' | '-'<br>
   216	 *       </code></td>
   217	 *     </tr>
   218	 *     <tr align="top">
   219	 *       <td nowrap valign="top" align="right"><code>char :=  </code></td>
   220	 *       <td valign="top"><em>any character that is not</em><code> special<br>
   221	 *       | ('\' </code><em>any character</em><code>)<br>
   222	 *       | ('\\u' hex hex hex hex)<br>
   223	 *       </code></td>
   224	 *     </tr>
   225	 *     <tr align="top">
   226	 *       <td nowrap valign="top" align="right"><code>hex :=  </code></td>
   227	 *       <td valign="top"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br>
   228	 *           'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td>
   229	 *     </tr>
   230	 *     <tr>
   231	 *       <td nowrap valign="top" align="right"><code>property :=  </code></td>
   232	 *       <td valign="top"><em>a Unicode property set pattern</em></td>
   233	 *     </tr>
   234	 *   </table>
   235	 *   <br>
   236	 *   <table border="1">
   237	 *     <tr>
   238	 *       <td>Legend: <table>
   239	 *         <tr>
   240	 *           <td nowrap valign="top"><code>a := b</code></td>
   241	 *           <td width="20" valign="top">  </td>
   242	 *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
   243	 *         </tr>
   244	 *         <tr>
   245	 *           <td nowrap valign="top"><code>a?</code></td>
   246	 *           <td valign="top"></td>
   247	 *           <td valign="top">zero or one instance of <code>a</code><br>
   248	 *           </td>
   249	 *         </tr>
   250	 *         <tr>
   251	 *           <td nowrap valign="top"><code>a*</code></td>
   252	 *           <td valign="top"></td>
   253	 *           <td valign="top">one or more instances of <code>a</code><br>
   254	 *           </td>
   255	 *         </tr>
   256	 *         <tr>
   257	 *           <td nowrap valign="top"><code>a | b</code></td>
   258	 *           <td valign="top"></td>
   259	 *           <td valign="top">either <code>a</code> or <code>b</code><br>
   260	 *           </td>
   261	 *         </tr>
   262	 *         <tr>
   263	 *           <td nowrap valign="top"><code>'a'</code></td>
   264	 *           <td valign="top"></td>
   265	 *           <td valign="top">the literal string between the quotes </td>
   266	 *         </tr>
   267	 *       </table>
   268	 *       </td>
   269	 *     </tr>
   270	 *   </table>
   271	 * \htmlonly</blockquote>\endhtmlonly
   272	 * 
   273	 * <p>Note:
   274	 *  - Most UnicodeSet methods do not take a UErrorCode parameter because
   275	 *   there are usually very few opportunities for failure other than a shortage
   276	 *   of memory, error codes in low-level C++ string methods would be inconvenient,
   277	 *   and the error code as the last parameter (ICU convention) would prevent
   278	 *   the use of default parameter values.
   279	 *   Instead, such methods set the UnicodeSet into a "bogus" state
   280	 *   (see isBogus()) if an error occurs.
   281	 *
   282	 * @author Alan Liu
   283	 * @stable ICU 2.0
   284	 */
   285	class U_COMMON_API UnicodeSet final : public UnicodeFilter {
   286	private:
   287	    /**
   288	     * Enough for sets with few ranges.
   289	     * For example, White_Space has 10 ranges, list length 21.
   290	     */
   291	    static constexpr int32_t INITIAL_CAPACITY = 25;
   292	    // fFlags constant
   293	    static constexpr uint8_t kIsBogus = 1;  // This set is bogus (i.e. not valid)
   294	
   295	    UChar32* list = stackList; // MUST be terminated with HIGH
   296	    int32_t capacity = INITIAL_CAPACITY; // capacity of list
   297	    int32_t len = 1; // length of list used; 1 <= len <= capacity
   298	    uint8_t fFlags = 0;         // Bit flag (see constants above)
   299	
   300	    BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not nullptr.
   301	    UChar32* buffer = nullptr; // internal buffer, may be nullptr
   302	    int32_t bufferCapacity = 0; // capacity of buffer
   303	
   304	    /**
   305	     * The pattern representation of this set.  This may not be the
   306	     * most economical pattern.  It is the pattern supplied to
   307	     * applyPattern(), with variables substituted and whitespace
   308	     * removed.  For sets constructed without applyPattern(), or
   309	     * modified using the non-pattern API, this string will be empty,
   310	     * indicating that toPattern() must generate a pattern
   311	     * representation from the inversion list.
   312	     */
   313	    char16_t *pat = nullptr;
   314	    int32_t patLen = 0;
   315	
   316	    UVector* strings = nullptr; // maintained in sorted order
   317	    UnicodeSetStringSpan *stringSpan = nullptr;
   318	
   319	    /**
   320	     * Initial list array.
   321	     * Avoids some heap allocations, and list is never nullptr.
   322	     * Increases the object size a bit.
   323	     */
   324	    UChar32 stackList[INITIAL_CAPACITY];
   325	
   326	public:
   327	    /**
   328	     * Determine if this object contains a valid set.
   329	     * A bogus set has no value. It is different from an empty set.
   330	     * It can be used to indicate that no set value is available.
   331	     *
   332	     * @return true if the set is bogus/invalid, false otherwise
   333	     * @see setToBogus()
   334	     * @stable ICU 4.0
   335	     */
   336	    inline UBool isBogus(void) const;
   337	
   338	    /**
   339	     * Make this UnicodeSet object invalid.
   340	     * The string will test true with isBogus().
   341	     *
   342	     * A bogus set has no value. It is different from an empty set.
   343	     * It can be used to indicate that no set value is available.
   344	     *
   345	     * This utility function is used throughout the UnicodeSet
   346	     * implementation to indicate that a UnicodeSet operation failed,
   347	     * and may be used in other functions,
   348	     * especially but not exclusively when such functions do not
   349	     * take a UErrorCode for simplicity.
   350	     *
   351	     * @see isBogus()
   352	     * @stable ICU 4.0
   353	     */
   354	    void setToBogus();
   355	
   356	public:
   357	
   358	    enum {
   359	        /**
   360	         * Minimum value that can be stored in a UnicodeSet.
   361	         * @stable ICU 2.4
   362	         */
   363	        MIN_VALUE = 0,
   364	
   365	        /**
   366	         * Maximum value that can be stored in a UnicodeSet.
   367	         * @stable ICU 2.4
   368	         */
   369	        MAX_VALUE = 0x10ffff
   370	    };
   371	
   372	    //----------------------------------------------------------------
   373	    // Constructors &c
   374	    //----------------------------------------------------------------
   375	
   376	public:
   377	
   378	    /**
   379	     * Constructs an empty set.
   380	     * @stable ICU 2.0
   381	     */
   382	    UnicodeSet();
   383	
   384	    /**
   385	     * Constructs a set containing the given range. If <code>end <
   386	     * start</code> then an empty set is created.
   387	     *
   388	     * @param start first character, inclusive, of range
   389	     * @param end last character, inclusive, of range
   390	     * @stable ICU 2.4
   391	     */
   392	    UnicodeSet(UChar32 start, UChar32 end);
   393	
   394	#ifndef U_HIDE_INTERNAL_API
   395	    /**
   396	     * @internal
   397	     */
   398	    enum ESerialization {
   399	      kSerialized  /* result of serialize() */
   400	    };
   401	
   402	    /**
   403	     * Constructs a set from the output of serialize().
   404	     *
   405	     * @param buffer the 16 bit array
   406	     * @param bufferLen the original length returned from serialize()
   407	     * @param serialization the value 'kSerialized'
   408	     * @param status error code
   409	     *
   410	     * @internal
   411	     */
   412	    UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
   413	               ESerialization serialization, UErrorCode &status);
   414	#endif  /* U_HIDE_INTERNAL_API */
   415	
   416	    /**
   417	     * Constructs a set from the given pattern.  See the class
   418	     * description for the syntax of the pattern language.
   419	     * @param pattern a string specifying what characters are in the set
   420	     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
   421	     * contains a syntax error.
   422	     * @stable ICU 2.0
   423	     */
   424	    UnicodeSet(const UnicodeString& pattern,
   425	               UErrorCode& status);
   426	
   427	#ifndef U_HIDE_INTERNAL_API
   428	    /**
   429	     * Constructs a set from the given pattern.  See the class
   430	     * description for the syntax of the pattern language.
   431	     * @param pattern a string specifying what characters are in the set
   432	     * @param options bitmask for options to apply to the pattern.
   433	     * Valid options are USET_IGNORE_SPACE and
   434	     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
   435	     * These case options are mutually exclusive.
   436	     * @param symbols a symbol table mapping variable names to values
   437	     * and stand-in characters to UnicodeSets; may be nullptr
   438	     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
   439	     * contains a syntax error.
   440	     * @internal
   441	     */
   442	    UnicodeSet(const UnicodeString& pattern,
   443	               uint32_t options,
   444	               const SymbolTable* symbols,
   445	               UErrorCode& status);
   446	#endif  /* U_HIDE_INTERNAL_API */
   447	
   448	    /**
   449	     * Constructs a set from the given pattern.  See the class description
   450	     * for the syntax of the pattern language.
   451	     * @param pattern a string specifying what characters are in the set
   452	     * @param pos on input, the position in pattern at which to start parsing.
   453	     * On output, the position after the last character parsed.
   454	     * @param options bitmask for options to apply to the pattern.
   455	     * Valid options are USET_IGNORE_SPACE and
   456	     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
   457	     * These case options are mutually exclusive.
   458	     * @param symbols a symbol table mapping variable names to values
   459	     * and stand-in characters to UnicodeSets; may be nullptr
   460	     * @param status input-output error code
   461	     * @stable ICU 2.8
   462	     */
   463	    UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
   464	               uint32_t options,
   465	               const SymbolTable* symbols,
   466	               UErrorCode& status);
   467	
   468	    /**
   469	     * Constructs a set that is identical to the given UnicodeSet.
   470	     * @stable ICU 2.0
   471	     */
   472	    UnicodeSet(const UnicodeSet& o);
   473	
   474	    /**
   475	     * Destructs the set.
   476	     * @stable ICU 2.0
   477	     */
   478	    virtual ~UnicodeSet();
   479	
   480	    /**
   481	     * Assigns this object to be a copy of another.
   482	     * A frozen set will not be modified.
   483	     * @stable ICU 2.0
   484	     */
   485	    UnicodeSet& operator=(const UnicodeSet& o);
   486	
   487	    /**
   488	     * Compares the specified object with this set for equality.  Returns
   489	     * <tt>true</tt> if the two sets
   490	     * have the same size, and every member of the specified set is
   491	     * contained in this set (or equivalently, every member of this set is
   492	     * contained in the specified set).
   493	     *
   494	     * @param o set to be compared for equality with this set.
   495	     * @return <tt>true</tt> if the specified set is equal to this set.
   496	     * @stable ICU 2.0
   497	     */
   498	    virtual bool operator==(const UnicodeSet& o) const;
   499	
   500	    /**
   501	     * Compares the specified object with this set for equality.  Returns
   502	     * <tt>true</tt> if the specified set is not equal to this set.
   503	     * @stable ICU 2.0
   504	     */
   505	    inline bool operator!=(const UnicodeSet& o) const;
   506	
   507	    /**
   508	     * Returns a copy of this object.  All UnicodeFunctor objects have
   509	     * to support cloning in order to allow classes using
   510	     * UnicodeFunctors, such as Transliterator, to implement cloning.
   511	     * If this set is frozen, then the clone will be frozen as well.
   512	     * Use cloneAsThawed() for a mutable clone of a frozen set.
   513	     * @see cloneAsThawed
   514	     * @stable ICU 2.0
   515	     */
   516	    virtual UnicodeSet* clone() const override;
   517	
   518	    /**
   519	     * Returns the hash code value for this set.
   520	     *
   521	     * @return the hash code value for this set.
   522	     * @see Object#hashCode()
   523	     * @stable ICU 2.0
   524	     */
   525	    virtual int32_t hashCode(void) const;
   526	
   527	    /**
   528	     * Get a UnicodeSet pointer from a USet
   529	     *
   530	     * @param uset a USet (the ICU plain C type for UnicodeSet)
   531	     * @return the corresponding UnicodeSet pointer.
   532	     *
   533	     * @stable ICU 4.2
   534	     */
   535	    inline static UnicodeSet *fromUSet(USet *uset);
   536	
   537	    /**
   538	     * Get a UnicodeSet pointer from a const USet
   539	     *
   540	     * @param uset a const USet (the ICU plain C type for UnicodeSet)
   541	     * @return the corresponding UnicodeSet pointer.
   542	     *
   543	     * @stable ICU 4.2
   544	     */
   545	    inline static const UnicodeSet *fromUSet(const USet *uset);
   546	    
   547	    /**
   548	     * Produce a USet * pointer for this UnicodeSet.
   549	     * USet is the plain C type for UnicodeSet
   550	     *
   551	     * @return a USet pointer for this UnicodeSet
   552	     * @stable ICU 4.2
   553	     */
   554	    inline USet *toUSet();
   555	
   556	
   557	    /**
   558	     * Produce a const USet * pointer for this UnicodeSet.
   559	     * USet is the plain C type for UnicodeSet
   560	     *
   561	     * @return a const USet pointer for this UnicodeSet
   562	     * @stable ICU 4.2
   563	     */
   564	    inline const USet * toUSet() const;
   565	
   566	
   567	    //----------------------------------------------------------------
   568	    // Freezable API
   569	    //----------------------------------------------------------------
   570	
   571	    /**
   572	     * Determines whether the set has been frozen (made immutable) or not.
   573	     * See the ICU4J Freezable interface for details.
   574	     * @return true/false for whether the set has been frozen
   575	     * @see freeze
   576	     * @see cloneAsThawed
   577	     * @stable ICU 3.8
   578	     */
   579	    inline UBool isFrozen() const;
   580	
   581	    /**
   582	     * Freeze the set (make it immutable).
   583	     * Once frozen, it cannot be unfrozen and is therefore thread-safe
   584	     * until it is deleted.
   585	     * See the ICU4J Freezable interface for details.
   586	     * Freezing the set may also make some operations faster, for example
   587	     * contains() and span().
   588	     * A frozen set will not be modified. (It remains frozen.)
   589	     * @return this set.
   590	     * @see isFrozen
   591	     * @see cloneAsThawed
   592	     * @stable ICU 3.8
   593	     */
   594	    UnicodeSet *freeze();
   595	
   596	    /**
   597	     * Clone the set and make the clone mutable.
   598	     * See the ICU4J Freezable interface for details.
   599	     * @return the mutable clone
   600	     * @see freeze
   601	     * @see isFrozen
   602	     * @stable ICU 3.8
   603	     */
   604	    UnicodeSet *cloneAsThawed() const;
   605	
   606	    //----------------------------------------------------------------
   607	    // Public API
   608	    //----------------------------------------------------------------
   609	
   610	    /**
   611	     * Make this object represent the range `start - end`.
   612	     * If `start > end` then this object is set to an empty range.
   613	     * A frozen set will not be modified.
   614	     *
   615	     * @param start first character in the set, inclusive
   616	     * @param end last character in the set, inclusive
   617	     * @stable ICU 2.4
   618	     */
   619	    UnicodeSet& set(UChar32 start, UChar32 end);
   620	
   621	    /**
   622	     * Return true if the given position, in the given pattern, appears
   623	     * to be the start of a UnicodeSet pattern.
   624	     * @stable ICU 2.4
   625	     */
   626	    static UBool resemblesPattern(const UnicodeString& pattern,
   627	                                  int32_t pos);
   628	
   629	    /**
   630	     * Modifies this set to represent the set specified by the given
   631	     * pattern, ignoring Unicode Pattern_White_Space characters.
   632	     * See the class description for the syntax of the pattern language.
   633	     * A frozen set will not be modified.
   634	     * @param pattern a string specifying what characters are in the set
   635	     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
   636	     * contains a syntax error.
   637	     * <em> Empties the set passed before applying the pattern.</em>
   638	     * @return a reference to this
   639	     * @stable ICU 2.0
   640	     */
   641	    UnicodeSet& applyPattern(const UnicodeString& pattern,
   642	                             UErrorCode& status);
   643	
   644	#ifndef U_HIDE_INTERNAL_API
   645	    /**
   646	     * Modifies this set to represent the set specified by the given
   647	     * pattern, optionally ignoring Unicode Pattern_White_Space characters.
   648	     * See the class description for the syntax of the pattern language.
   649	     * A frozen set will not be modified.
   650	     * @param pattern a string specifying what characters are in the set
   651	     * @param options bitmask for options to apply to the pattern.
   652	     * Valid options are USET_IGNORE_SPACE and
   653	     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
   654	     * These case options are mutually exclusive.
   655	     * @param symbols a symbol table mapping variable names to
   656	     * values and stand-ins to UnicodeSets; may be nullptr
   657	     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
   658	     * contains a syntax error.
   659	     *<em> Empties the set passed before applying the pattern.</em>
   660	     * @return a reference to this
   661	     * @internal
   662	     */
   663	    UnicodeSet& applyPattern(const UnicodeString& pattern,
   664	                             uint32_t options,
   665	                             const SymbolTable* symbols,
   666	                             UErrorCode& status);
   667	#endif  /* U_HIDE_INTERNAL_API */
   668	
   669	    /**
   670	     * Parses the given pattern, starting at the given position.  The
   671	     * character at pattern.charAt(pos.getIndex()) must be '[', or the
   672	     * parse fails.  Parsing continues until the corresponding closing
   673	     * ']'.  If a syntax error is encountered between the opening and
   674	     * closing brace, the parse fails.  Upon return from a successful
   675	     * parse, the ParsePosition is updated to point to the character
   676	     * following the closing ']', and a StringBuffer containing a
   677	     * pairs list for the parsed pattern is returned.  This method calls
   678	     * itself recursively to parse embedded subpatterns.
   679	     *<em> Empties the set passed before applying the pattern.</em>
   680	     * A frozen set will not be modified.
   681	     *
   682	     * @param pattern the string containing the pattern to be parsed.
   683	     * The portion of the string from pos.getIndex(), which must be a
   684	     * '[', to the corresponding closing ']', is parsed.
   685	     * @param pos upon entry, the position at which to being parsing.
   686	     * The character at pattern.charAt(pos.getIndex()) must be a '['.
   687	     * Upon return from a successful parse, pos.getIndex() is either
   688	     * the character after the closing ']' of the parsed pattern, or
   689	     * pattern.length() if the closing ']' is the last character of
   690	     * the pattern string.
   691	     * @param options bitmask for options to apply to the pattern.
   692	     * Valid options are USET_IGNORE_SPACE and
   693	     * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
   694	     * These case options are mutually exclusive.
   695	     * @param symbols a symbol table mapping variable names to
   696	     * values and stand-ins to UnicodeSets; may be nullptr
   697	     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
   698	     * contains a syntax error.
   699	     * @return a reference to this
   700	     * @stable ICU 2.8
   701	     */
   702	    UnicodeSet& applyPattern(const UnicodeString& pattern,
   703	                             ParsePosition& pos,
   704	                             uint32_t options,
   705	                             const SymbolTable* symbols,
   706	                             UErrorCode& status);
   707	
   708	    /**
   709	     * Returns a string representation of this set.  If the result of
   710	     * calling this function is passed to a UnicodeSet constructor, it
   711	     * will produce another set that is equal to this one.
   712	     * A frozen set will not be modified.
   713	     * @param result the string to receive the rules.  Previous
   714	     * contents will be deleted.
   715	     * @param escapeUnprintable if true then convert unprintable
   716	     * character to their hex escape representations, \\uxxxx or
   717	     * \\Uxxxxxxxx.  Unprintable characters are those other than
   718	     * U+000A, U+0020..U+007E.
   719	     * @stable ICU 2.0
   720	     */
   721	    virtual UnicodeString& toPattern(UnicodeString& result,
   722	                                     UBool escapeUnprintable = false) const override;
   723	
   724	    /**
   725	     * Modifies this set to contain those code points which have the given value
   726	     * for the given binary or enumerated property, as returned by
   727	     * u_getIntPropertyValue.  Prior contents of this set are lost.
   728	     * A frozen set will not be modified.
   729	     *
   730	     * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
   731	     * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
   732	     * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
   733	     *
   734	     * @param value a value in the range u_getIntPropertyMinValue(prop)..
   735	     * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
   736	     * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
   737	     * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
   738	     * categories such as [:L:] to be represented.
   739	     *
   740	     * @param ec error code input/output parameter
   741	     *
   742	     * @return a reference to this set
   743	     *
   744	     * @stable ICU 2.4
   745	     */
   746	    UnicodeSet& applyIntPropertyValue(UProperty prop,
   747	                                      int32_t value,
   748	                                      UErrorCode& ec);
   749	
   750	    /**
   751	     * Modifies this set to contain those code points which have the
   752	     * given value for the given property.  Prior contents of this
   753	     * set are lost.
   754	     * A frozen set will not be modified.
   755	     *
   756	     * @param prop a property alias, either short or long.  The name is matched
   757	     * loosely.  See PropertyAliases.txt for names and a description of loose
   758	     * matching.  If the value string is empty, then this string is interpreted
   759	     * as either a General_Category value alias, a Script value alias, a binary
   760	     * property alias, or a special ID.  Special IDs are matched loosely and
   761	     * correspond to the following sets:
   762	     *
   763	     * "ANY" = [\\u0000-\\U0010FFFF],
   764	     * "ASCII" = [\\u0000-\\u007F],
   765	     * "Assigned" = [:^Cn:].
   766	     *
   767	     * @param value a value alias, either short or long.  The name is matched
   768	     * loosely.  See PropertyValueAliases.txt for names and a description of
   769	     * loose matching.  In addition to aliases listed, numeric values and
   770	     * canonical combining classes may be expressed numerically, e.g., ("nv",
   771	     * "0.5") or ("ccc", "220").  The value string may also be empty.
   772	     *
   773	     * @param ec error code input/output parameter
   774	     *
   775	     * @return a reference to this set
   776	     *
   777	     * @stable ICU 2.4
   778	     */
   779	    UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
   780	                                   const UnicodeString& value,
   781	                                   UErrorCode& ec);
   782	
   783	    /**
   784	     * Returns the number of elements in this set (its cardinality).
   785	     * Note than the elements of a set may include both individual
   786	     * codepoints and strings.
   787	     *
   788	     * This is slower than getRangeCount() because
   789	     * it counts the code points of all ranges.
   790	     *
   791	     * @return the number of elements in this set (its cardinality).
   792	     * @stable ICU 2.0
   793	     * @see getRangeCount
   794	     */
   795	    virtual int32_t size(void) const;
   796	
   797	    /**
   798	     * Returns <tt>true</tt> if this set contains no elements.
   799	     *
   800	     * @return <tt>true</tt> if this set contains no elements.
   801	     * @stable ICU 2.0
   802	     */
   803	    virtual UBool isEmpty(void) const;
   804	
   805	    /**
   806	     * @return true if this set contains multi-character strings or the empty string.
   807	     * @stable ICU 70
   808	     */
   809	    UBool hasStrings() const;
   810	
   811	    /**
   812	     * Returns true if this set contains the given character.
   813	     * This function works faster with a frozen set.
   814	     * @param c character to be checked for containment
   815	     * @return true if the test condition is met
   816	     * @stable ICU 2.0
   817	     */
   818	    virtual UBool contains(UChar32 c) const override;
   819	
   820	    /**
   821	     * Returns true if this set contains every character
   822	     * of the given range.
   823	     * @param start first character, inclusive, of the range
   824	     * @param end last character, inclusive, of the range
   825	     * @return true if the test condition is met
   826	     * @stable ICU 2.0
   827	     */
   828	    virtual UBool contains(UChar32 start, UChar32 end) const;
   829	
   830	    /**
   831	     * Returns <tt>true</tt> if this set contains the given
   832	     * multicharacter string.
   833	     * @param s string to be checked for containment
   834	     * @return <tt>true</tt> if this set contains the specified string
   835	     * @stable ICU 2.4
   836	     */
   837	    UBool contains(const UnicodeString& s) const;
   838	
   839	    /**
   840	     * Returns true if this set contains all the characters and strings
   841	     * of the given set.
   842	     * @param c set to be checked for containment
   843	     * @return true if the test condition is met
   844	     * @stable ICU 2.4
   845	     */
   846	    virtual UBool containsAll(const UnicodeSet& c) const;
   847	
   848	    /**
   849	     * Returns true if this set contains all the characters
   850	     * of the given string.
   851	     * @param s string containing characters to be checked for containment
   852	     * @return true if the test condition is met
   853	     * @stable ICU 2.4
   854	     */
   855	    UBool containsAll(const UnicodeString& s) const;
   856	
   857	    /**
   858	     * Returns true if this set contains none of the characters
   859	     * of the given range.
   860	     * @param start first character, inclusive, of the range
   861	     * @param end last character, inclusive, of the range
   862	     * @return true if the test condition is met
   863	     * @stable ICU 2.4
   864	     */
   865	    UBool containsNone(UChar32 start, UChar32 end) const;
   866	
   867	    /**
   868	     * Returns true if this set contains none of the characters and strings
   869	     * of the given set.
   870	     * @param c set to be checked for containment
   871	     * @return true if the test condition is met
   872	     * @stable ICU 2.4
   873	     */
   874	    UBool containsNone(const UnicodeSet& c) const;
   875	
   876	    /**
   877	     * Returns true if this set contains none of the characters
   878	     * of the given string.
   879	     * @param s string containing characters to be checked for containment
   880	     * @return true if the test condition is met
   881	     * @stable ICU 2.4
   882	     */
   883	    UBool containsNone(const UnicodeString& s) const;
   884	
   885	    /**
   886	     * Returns true if this set contains one or more of the characters
   887	     * in the given range.
   888	     * @param start first character, inclusive, of the range
   889	     * @param end last character, inclusive, of the range
   890	     * @return true if the condition is met
   891	     * @stable ICU 2.4
   892	     */
   893	    inline UBool containsSome(UChar32 start, UChar32 end) const;
   894	
   895	    /**
   896	     * Returns true if this set contains one or more of the characters
   897	     * and strings of the given set.
   898	     * @param s The set to be checked for containment
   899	     * @return true if the condition is met
   900	     * @stable ICU 2.4
   901	     */
   902	    inline UBool containsSome(const UnicodeSet& s) const;
   903	
   904	    /**
   905	     * Returns true if this set contains one or more of the characters
   906	     * of the given string.
   907	     * @param s string containing characters to be checked for containment
   908	     * @return true if the condition is met
   909	     * @stable ICU 2.4
   910	     */
   911	    inline UBool containsSome(const UnicodeString& s) const;
   912	
   913	    /**
   914	     * Returns the length of the initial substring of the input string which
   915	     * consists only of characters and strings that are contained in this set
   916	     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
   917	     * or only of characters and strings that are not contained
   918	     * in this set (USET_SPAN_NOT_CONTAINED).
   919	     * See USetSpanCondition for details.
   920	     * Similar to the strspn() C library function.
   921	     * Unpaired surrogates are treated according to contains() of their surrogate code points.
   922	     * This function works faster with a frozen set and with a non-negative string length argument.
   923	     * @param s start of the string
   924	     * @param length of the string; can be -1 for NUL-terminated
   925	     * @param spanCondition specifies the containment condition
   926	     * @return the length of the initial substring according to the spanCondition;
   927	     *         0 if the start of the string does not fit the spanCondition
   928	     * @stable ICU 3.8
   929	     * @see USetSpanCondition
   930	     */
   931	    int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
   932	
   933	    /**
   934	     * Returns the end of the substring of the input string according to the USetSpanCondition.
   935	     * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
   936	     * after pinning start to 0<=start<=s.length().
   937	     * @param s the string
   938	     * @param start the start index in the string for the span operation
   939	     * @param spanCondition specifies the containment condition
   940	     * @return the exclusive end of the substring according to the spanCondition;
   941	     *         the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
   942	     * @stable ICU 4.4
   943	     * @see USetSpanCondition
   944	     */
   945	    inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
   946	
   947	    /**
   948	     * Returns the start of the trailing substring of the input string which
   949	     * consists only of characters and strings that are contained in this set
   950	     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
   951	     * or only of characters and strings that are not contained
   952	     * in this set (USET_SPAN_NOT_CONTAINED).
   953	     * See USetSpanCondition for details.
   954	     * Unpaired surrogates are treated according to contains() of their surrogate code points.
   955	     * This function works faster with a frozen set and with a non-negative string length argument.
   956	     * @param s start of the string
   957	     * @param length of the string; can be -1 for NUL-terminated
   958	     * @param spanCondition specifies the containment condition
   959	     * @return the start of the trailing substring according to the spanCondition;
   960	     *         the string length if the end of the string does not fit the spanCondition
   961	     * @stable ICU 3.8
   962	     * @see USetSpanCondition
   963	     */
   964	    int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
   965	
   966	    /**
   967	     * Returns the start of the substring of the input string according to the USetSpanCondition.
   968	     * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
   969	     * after pinning limit to 0<=end<=s.length().
   970	     * @param s the string
   971	     * @param limit the exclusive-end index in the string for the span operation
   972	     *              (use s.length() or INT32_MAX for spanning back from the end of the string)
   973	     * @param spanCondition specifies the containment condition
   974	     * @return the start of the substring according to the spanCondition;
   975	     *         the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
   976	     * @stable ICU 4.4
   977	     * @see USetSpanCondition
   978	     */
   979	    inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
   980	
   981	    /**
   982	     * Returns the length of the initial substring of the input string which
   983	     * consists only of characters and strings that are contained in this set
   984	     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
   985	     * or only of characters and strings that are not contained
   986	     * in this set (USET_SPAN_NOT_CONTAINED).
   987	     * See USetSpanCondition for details.
   988	     * Similar to the strspn() C library function.
   989	     * Malformed byte sequences are treated according to contains(0xfffd).
   990	     * This function works faster with a frozen set and with a non-negative string length argument.
   991	     * @param s start of the string (UTF-8)
   992	     * @param length of the string; can be -1 for NUL-terminated
   993	     * @param spanCondition specifies the containment condition
   994	     * @return the length of the initial substring according to the spanCondition;
   995	     *         0 if the start of the string does not fit the spanCondition
   996	     * @stable ICU 3.8
   997	     * @see USetSpanCondition
   998	     */
   999	    int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
  1000	
  1001	    /**
  1002	     * Returns the start of the trailing substring of the input string which
  1003	     * consists only of characters and strings that are contained in this set
  1004	     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
  1005	     * or only of characters and strings that are not contained
  1006	     * in this set (USET_SPAN_NOT_CONTAINED).
  1007	     * See USetSpanCondition for details.
  1008	     * Malformed byte sequences are treated according to contains(0xfffd).
  1009	     * This function works faster with a frozen set and with a non-negative string length argument.
  1010	     * @param s start of the string (UTF-8)
  1011	     * @param length of the string; can be -1 for NUL-terminated
  1012	     * @param spanCondition specifies the containment condition
  1013	     * @return the start of the trailing substring according to the spanCondition;
  1014	     *         the string length if the end of the string does not fit the spanCondition
  1015	     * @stable ICU 3.8
  1016	     * @see USetSpanCondition
  1017	     */
  1018	    int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
  1019	
  1020	    /**
  1021	     * Implement UnicodeMatcher::matches()
  1022	     * @stable ICU 2.4
  1023	     */
  1024	    virtual UMatchDegree matches(const Replaceable& text,
  1025	                         int32_t& offset,
  1026	                         int32_t limit,
  1027	                         UBool incremental) override;
  1028	
  1029	private:
  1030	    /**
  1031	     * Returns the longest match for s in text at the given position.
  1032	     * If limit > start then match forward from start+1 to limit
  1033	     * matching all characters except s.charAt(0).  If limit < start,
  1034	     * go backward starting from start-1 matching all characters
  1035	     * except s.charAt(s.length()-1).  This method assumes that the
  1036	     * first character, text.charAt(start), matches s, so it does not
  1037	     * check it.
  1038	     * @param text the text to match
  1039	     * @param start the first character to match.  In the forward
  1040	     * direction, text.charAt(start) is matched against s.charAt(0).
  1041	     * In the reverse direction, it is matched against
  1042	     * s.charAt(s.length()-1).
  1043	     * @param limit the limit offset for matching, either last+1 in
  1044	     * the forward direction, or last-1 in the reverse direction,
  1045	     * where last is the index of the last character to match.
  1046	     * @param s
  1047	     * @return If part of s matches up to the limit, return |limit -
  1048	     * start|.  If all of s matches before reaching the limit, return
  1049	     * s.length().  If there is a mismatch between s and text, return
  1050	     * 0
  1051	     */
  1052	    static int32_t matchRest(const Replaceable& text,
  1053	                             int32_t start, int32_t limit,
  1054	                             const UnicodeString& s);
  1055	
  1056	    /**
  1057	     * Returns the smallest value i such that c < list[i].  Caller
  1058	     * must ensure that c is a legal value or this method will enter
  1059	     * an infinite loop.  This method performs a binary search.
  1060	     * @param c a character in the range MIN_VALUE..MAX_VALUE
  1061	     * inclusive
  1062	     * @return the smallest integer i in the range 0..len-1,
  1063	     * inclusive, such that c < list[i]
  1064	     */
  1065	    int32_t findCodePoint(UChar32 c) const;
  1066	
  1067	public:
  1068	
  1069	    /**
  1070	     * Implementation of UnicodeMatcher API.  Union the set of all
  1071	     * characters that may be matched by this object into the given
  1072	     * set.
  1073	     * @param toUnionTo the set into which to union the source characters
  1074	     * @stable ICU 2.4
  1075	     */
  1076	    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const override;
  1077	
  1078	    /**
  1079	     * Returns the index of the given character within this set, where
  1080	     * the set is ordered by ascending code point.  If the character
  1081	     * is not in this set, return -1.  The inverse of this method is
  1082	     * <code>charAt()</code>.
  1083	     * @return an index from 0..size()-1, or -1
  1084	     * @stable ICU 2.4
  1085	     */
  1086	    int32_t indexOf(UChar32 c) const;
  1087	
  1088	    /**
  1089	     * Returns the character at the given index within this set, where
  1090	     * the set is ordered by ascending code point.  If the index is
  1091	     * out of range for characters, returns (UChar32)-1.
  1092	     * The inverse of this method is <code>indexOf()</code>.
  1093	     *
  1094	     * For iteration, this is slower than UnicodeSetIterator or
  1095	     * getRangeCount()/getRangeStart()/getRangeEnd(),
  1096	     * because for each call it skips linearly over <code>index</code>
  1097	     * characters in the ranges.
  1098	     *
  1099	     * @param index an index from 0..size()-1
  1100	     * @return the character at the given index, or (UChar32)-1.
  1101	     * @stable ICU 2.4
  1102	     */
  1103	    UChar32 charAt(int32_t index) const;
  1104	
  1105	    /**
  1106	     * Adds the specified range to this set if it is not already
  1107	     * present.  If this set already contains the specified range,
  1108	     * the call leaves this set unchanged.  If <code>start > end</code>
  1109	     * then an empty range is added, leaving the set unchanged.
  1110	     * This is equivalent to a boolean logic OR, or a set UNION.
  1111	     * A frozen set will not be modified.
  1112	     *
  1113	     * @param start first character, inclusive, of range to be added
  1114	     * to this set.
  1115	     * @param end last character, inclusive, of range to be added
  1116	     * to this set.
  1117	     * @stable ICU 2.0
  1118	     */
  1119	    virtual UnicodeSet& add(UChar32 start, UChar32 end);
  1120	
  1121	    /**
  1122	     * Adds the specified character to this set if it is not already
  1123	     * present.  If this set already contains the specified character,
  1124	     * the call leaves this set unchanged.
  1125	     * A frozen set will not be modified.
  1126	     *
  1127	     * @param c the character (code point)
  1128	     * @return this object, for chaining
  1129	     * @stable ICU 2.0
  1130	     */
  1131	    UnicodeSet& add(UChar32 c);
  1132	
  1133	    /**
  1134	     * Adds the specified multicharacter to this set if it is not already
  1135	     * present.  If this set already contains the multicharacter,
  1136	     * the call leaves this set unchanged.
  1137	     * Thus "ch" => {"ch"}
  1138	     * A frozen set will not be modified.
  1139	     *
  1140	     * @param s the source string
  1141	     * @return this object, for chaining
  1142	     * @stable ICU 2.4
  1143	     */
  1144	    UnicodeSet& add(const UnicodeString& s);
  1145	
  1146	 private:
  1147	    /**
  1148	     * @return a code point IF the string consists of a single one.
  1149	     * otherwise returns -1.
  1150	     * @param s string to test
  1151	     */
  1152	    static int32_t getSingleCP(const UnicodeString& s);
  1153	
  1154	    void _add(const UnicodeString& s);
  1155	
  1156	 public:
  1157	    /**
  1158	     * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
  1159	     * If this set already contains any particular character, it has no effect on that character.
  1160	     * A frozen set will not be modified.
  1161	     * @param s the source string
  1162	     * @return this object, for chaining
  1163	     * @stable ICU 2.4
  1164	     */
  1165	    UnicodeSet& addAll(const UnicodeString& s);
  1166	
  1167	    /**
  1168	     * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
  1169	     * A frozen set will not be modified.
  1170	     * @param s the source string
  1171	     * @return this object, for chaining
  1172	     * @stable ICU 2.4
  1173	     */
  1174	    UnicodeSet& retainAll(const UnicodeString& s);
  1175	
  1176	    /**
  1177	     * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
  1178	     * A frozen set will not be modified.
  1179	     * @param s the source string
  1180	     * @return this object, for chaining
  1181	     * @stable ICU 2.4
  1182	     */
  1183	    UnicodeSet& complementAll(const UnicodeString& s);
  1184	
  1185	    /**
  1186	     * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
  1187	     * A frozen set will not be modified.
  1188	     * @param s the source string
  1189	     * @return this object, for chaining
  1190	     * @stable ICU 2.4
  1191	     */
  1192	    UnicodeSet& removeAll(const UnicodeString& s);
  1193	
  1194	    /**
  1195	     * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
  1196	     *
  1197	     * @param s the source string
  1198	     * @return a newly created set containing the given string.
  1199	     * The caller owns the return object and is responsible for deleting it.
  1200	     * @stable ICU 2.4
  1201	     */
  1202	    static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
  1203	
  1204	
  1205	    /**
  1206	     * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
  1207	     * @param s the source string
  1208	     * @return a newly created set containing the given characters
  1209	     * The caller owns the return object and is responsible for deleting it.
  1210	     * @stable ICU 2.4
  1211	     */
  1212	    static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
  1213	
  1214	    /**
  1215	     * Retain only the elements in this set that are contained in the
  1216	     * specified range.  If <code>start > end</code> then an empty range is
  1217	     * retained, leaving the set empty.  This is equivalent to
  1218	     * a boolean logic AND, or a set INTERSECTION.
  1219	     * A frozen set will not be modified.
  1220	     *
  1221	     * @param start first character, inclusive, of range
  1222	     * @param end last character, inclusive, of range
  1223	     * @stable ICU 2.0
  1224	     */
  1225	    virtual UnicodeSet& retain(UChar32 start, UChar32 end);
  1226	
  1227	
  1228	    /**
  1229	     * Retain the specified character from this set if it is present.
  1230	     * A frozen set will not be modified.
  1231	     *
  1232	     * @param c the character (code point)
  1233	     * @return this object, for chaining
  1234	     * @stable ICU 2.0
  1235	     */
  1236	    UnicodeSet& retain(UChar32 c);
  1237	
  1238	    /**
  1239	     * Retains only the specified string from this set if it is present.
  1240	     * Upon return this set will be empty if it did not contain s, or
  1241	     * will only contain s if it did contain s.
  1242	     * A frozen set will not be modified.
  1243	     *
  1244	     * @param s the source string
  1245	     * @return this object, for chaining
  1246	     * @stable ICU 69
  1247	     */
  1248	    UnicodeSet& retain(const UnicodeString &s);
  1249	
  1250	    /**
  1251	     * Removes the specified range from this set if it is present.
  1252	     * The set will not contain the specified range once the call
  1253	     * returns.  If <code>start > end</code> then an empty range is
  1254	     * removed, leaving the set unchanged.
  1255	     * A frozen set will not be modified.
  1256	     *
  1257	     * @param start first character, inclusive, of range to be removed
  1258	     * from this set.
  1259	     * @param end last character, inclusive, of range to be removed
  1260	     * from this set.
  1261	     * @stable ICU 2.0
  1262	     */
  1263	    virtual UnicodeSet& remove(UChar32 start, UChar32 end);
  1264	
  1265	    /**
  1266	     * Removes the specified character from this set if it is present.
  1267	     * The set will not contain the specified range once the call
  1268	     * returns.
  1269	     * A frozen set will not be modified.
  1270	     *
  1271	     * @param c the character (code point)
  1272	     * @return this object, for chaining
  1273	     * @stable ICU 2.0
  1274	     */
  1275	    UnicodeSet& remove(UChar32 c);
  1276	
  1277	    /**
  1278	     * Removes the specified string from this set if it is present.
  1279	     * The set will not contain the specified character once the call
  1280	     * returns.
  1281	     * A frozen set will not be modified.
  1282	     * @param s the source string
  1283	     * @return this object, for chaining
  1284	     * @stable ICU 2.4
  1285	     */
  1286	    UnicodeSet& remove(const UnicodeString& s);
  1287	
  1288	    /**
  1289	     * This is equivalent to
  1290	     * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
  1291	     *
  1292	     * <strong>Note:</strong> This performs a symmetric difference with all code points
  1293	     * <em>and thus retains all multicharacter strings</em>.
  1294	     * In order to achieve a “code point complement” (all code points minus this set),
  1295	     * the easiest is to <code>.complement().removeAllStrings()</code>.
  1296	     *
  1297	     * A frozen set will not be modified.
  1298	     * @stable ICU 2.0
  1299	     */
  1300	    virtual UnicodeSet& complement();
  1301	
  1302	    /**
  1303	     * Complements the specified range in this set.  Any character in
  1304	     * the range will be removed if it is in this set, or will be
  1305	     * added if it is not in this set.  If <code>start > end</code>
  1306	     * then an empty range is complemented, leaving the set unchanged.
  1307	     * This is equivalent to a boolean logic XOR.
  1308	     * A frozen set will not be modified.
  1309	     *
  1310	     * @param start first character, inclusive, of range
  1311	     * @param end last character, inclusive, of range
  1312	     * @stable ICU 2.0
  1313	     */
  1314	    virtual UnicodeSet& complement(UChar32 start, UChar32 end);
  1315	
  1316	    /**
  1317	     * Complements the specified character in this set.  The character
  1318	     * will be removed if it is in this set, or will be added if it is
  1319	     * not in this set.
  1320	     * A frozen set will not be modified.
  1321	     *
  1322	     * @param c the character (code point)
  1323	     * @return this object, for chaining
  1324	     * @stable ICU 2.0
  1325	     */
  1326	    UnicodeSet& complement(UChar32 c);
  1327	
  1328	    /**
  1329	     * Complement the specified string in this set.
  1330	     * The string will be removed if it is in this set, or will be added if it is not in this set.
  1331	     * A frozen set will not be modified.
  1332	     *
  1333	     * @param s the string to complement
  1334	     * @return this object, for chaining
  1335	     * @stable ICU 2.4
  1336	     */
  1337	    UnicodeSet& complement(const UnicodeString& s);
  1338	
  1339	    /**
  1340	     * Adds all of the elements in the specified set to this set if
  1341	     * they're not already present.  This operation effectively
  1342	     * modifies this set so that its value is the <i>union</i> of the two
  1343	     * sets.  The behavior of this operation is unspecified if the specified
  1344	     * collection is modified while the operation is in progress.
  1345	     * A frozen set will not be modified.
  1346	     *
  1347	     * @param c set whose elements are to be added to this set.
  1348	     * @see #add(UChar32, UChar32)
  1349	     * @stable ICU 2.0
  1350	     */
  1351	    virtual UnicodeSet& addAll(const UnicodeSet& c);
  1352	
  1353	    /**
  1354	     * Retains only the elements in this set that are contained in the
  1355	     * specified set.  In other words, removes from this set all of
  1356	     * its elements that are not contained in the specified set.  This
  1357	     * operation effectively modifies this set so that its value is
  1358	     * the <i>intersection</i> of the two sets.
  1359	     * A frozen set will not be modified.
  1360	     *
  1361	     * @param c set that defines which elements this set will retain.
  1362	     * @stable ICU 2.0
  1363	     */
  1364	    virtual UnicodeSet& retainAll(const UnicodeSet& c);
  1365	
  1366	    /**
  1367	     * Removes from this set all of its elements that are contained in the
  1368	     * specified set.  This operation effectively modifies this
  1369	     * set so that its value is the <i>asymmetric set difference</i> of
  1370	     * the two sets.
  1371	     * A frozen set will not be modified.
  1372	     *
  1373	     * @param c set that defines which elements will be removed from
  1374	     *          this set.
  1375	     * @stable ICU 2.0
  1376	     */
  1377	    virtual UnicodeSet& removeAll(const UnicodeSet& c);
  1378	
  1379	    /**
  1380	     * Complements in this set all elements contained in the specified
  1381	     * set.  Any character in the other set will be removed if it is
  1382	     * in this set, or will be added if it is not in this set.
  1383	     * A frozen set will not be modified.
  1384	     *
  1385	     * @param c set that defines which elements will be xor'ed from
  1386	     *          this set.
  1387	     * @stable ICU 2.4
  1388	     */
  1389	    virtual UnicodeSet& complementAll(const UnicodeSet& c);
  1390	
  1391	    /**
  1392	     * Removes all of the elements from this set.  This set will be
  1393	     * empty after this call returns.
  1394	     * A frozen set will not be modified.
  1395	     * @stable ICU 2.0
  1396	     */
  1397	    virtual UnicodeSet& clear(void);
  1398	
  1399	    /**
  1400	     * Close this set over the given attribute.  For the attribute
  1401	     * USET_CASE_INSENSITIVE, the result is to modify this set so that:
  1402	     *
  1403	     * 1. For each character or string 'a' in this set, all strings or
  1404	     * characters 'b' such that foldCase(a) == foldCase(b) are added
  1405	     * to this set.
  1406	     *
  1407	     * 2. For each string 'e' in the resulting set, if e !=
  1408	     * foldCase(e), 'e' will be removed.
  1409	     *
  1410	     * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
  1411	     *
  1412	     * (Here foldCase(x) refers to the operation u_strFoldCase, and a
  1413	     * == b denotes that the contents are the same, not pointer
  1414	     * comparison.)
  1415	     *
  1416	     * A frozen set will not be modified.
  1417	     *
  1418	     * @param attribute bitmask for attributes to close over.
  1419	     * Valid options:
  1420	     * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
  1421	     * These case options are mutually exclusive.
  1422	     * Unrelated options bits are ignored.
  1423	     * @return a reference to this set.
  1424	     * @stable ICU 4.2
  1425	     */
  1426	    UnicodeSet& closeOver(int32_t attribute);
  1427	
  1428	    /**
  1429	     * Remove all strings from this set.
  1430	     *
  1431	     * @return a reference to this set.
  1432	     * @stable ICU 4.2
  1433	     */
  1434	    virtual UnicodeSet &removeAllStrings();
  1435	
  1436	    /**
  1437	     * Iteration method that returns the number of ranges contained in
  1438	     * this set.
  1439	     * @see #getRangeStart
  1440	     * @see #getRangeEnd
  1441	     * @stable ICU 2.4
  1442	     */
  1443	    virtual int32_t getRangeCount(void) const;
  1444	
  1445	    /**
  1446	     * Iteration method that returns the first character in the
  1447	     * specified range of this set.
  1448	     * @see #getRangeCount
  1449	     * @see #getRangeEnd
  1450	     * @stable ICU 2.4
  1451	     */
  1452	    virtual UChar32 getRangeStart(int32_t index) const;
  1453	
  1454	    /**
  1455	     * Iteration method that returns the last character in the
  1456	     * specified range of this set.
  1457	     * @see #getRangeStart
  1458	     * @see #getRangeEnd
  1459	     * @stable ICU 2.4
  1460	     */
  1461	    virtual UChar32 getRangeEnd(int32_t index) const;
  1462	
  1463	    /**
  1464	     * Serializes this set into an array of 16-bit integers.  Serialization
  1465	     * (currently) only records the characters in the set; multicharacter
  1466	     * strings are ignored.
  1467	     *
  1468	     * The array has following format (each line is one 16-bit
  1469	     * integer):
  1470	     *
  1471	     *  length     = (n+2*m) | (m!=0?0x8000:0)
  1472	     *  bmpLength  = n; present if m!=0
  1473	     *  bmp[0]
  1474	     *  bmp[1]
  1475	     *  ...
  1476	     *  bmp[n-1]
  1477	     *  supp-high[0]
  1478	     *  supp-low[0]
  1479	     *  supp-high[1]
  1480	     *  supp-low[1]
  1481	     *  ...
  1482	     *  supp-high[m-1]
  1483	     *  supp-low[m-1]
  1484	     *
  1485	     * The array starts with a header.  After the header are n bmp
  1486	     * code points, then m supplementary code points.  Either n or m
  1487	     * or both may be zero.  n+2*m is always <= 0x7FFF.
  1488	     *
  1489	     * If there are no supplementary characters (if m==0) then the
  1490	     * header is one 16-bit integer, 'length', with value n.
  1491	     *
  1492	     * If there are supplementary characters (if m!=0) then the header
  1493	     * is two 16-bit integers.  The first, 'length', has value
  1494	     * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
  1495	     *
  1496	     * After the header the code points are stored in ascending order.
  1497	     * Supplementary code points are stored as most significant 16
  1498	     * bits followed by least significant 16 bits.
  1499	     *
  1500	     * @param dest pointer to buffer of destCapacity 16-bit integers.
  1501	     * May be nullptr only if destCapacity is zero.
  1502	     * @param destCapacity size of dest, or zero.  Must not be negative.
  1503	     * @param ec error code.  Will be set to U_INDEX_OUTOFBOUNDS_ERROR
  1504	     * if n+2*m > 0x7FFF.  Will be set to U_BUFFER_OVERFLOW_ERROR if
  1505	     * n+2*m+(m!=0?2:1) > destCapacity.
  1506	     * @return the total length of the serialized format, including
  1507	     * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
  1508	     * than U_BUFFER_OVERFLOW_ERROR.
  1509	     * @stable ICU 2.4
  1510	     */
  1511	    int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
  1512	
  1513	    /**
  1514	     * Reallocate this objects internal structures to take up the least
  1515	     * possible space, without changing this object's value.
  1516	     * A frozen set will not be modified.
  1517	     * @stable ICU 2.4
  1518	     */
  1519	    virtual UnicodeSet& compact();
  1520	
  1521	    /**
  1522	     * Return the class ID for this class.  This is useful only for
  1523	     * comparing to a return value from getDynamicClassID().  For example:
  1524	     * <pre>
  1525	     * .      Base* polymorphic_pointer = createPolymorphicObject();
  1526	     * .      if (polymorphic_pointer->getDynamicClassID() ==
  1527	     * .          Derived::getStaticClassID()) ...
  1528	     * </pre>
  1529	     * @return          The class ID for all objects of this class.
  1530	     * @stable ICU 2.0
  1531	     */
  1532	    static UClassID U_EXPORT2 getStaticClassID(void);
  1533	
  1534	    /**
  1535	     * Implement UnicodeFunctor API.
  1536	     *
  1537	     * @return The class ID for this object. All objects of a given
  1538	     * class have the same class ID.  Objects of other classes have
  1539	     * different class IDs.
  1540	     * @stable ICU 2.4
  1541	     */
  1542	    virtual UClassID getDynamicClassID(void) const override;
  1543	
  1544	private:
  1545	
  1546	    // Private API for the USet API
  1547	
  1548	    friend class USetAccess;
  1549	
  1550	    const UnicodeString* getString(int32_t index) const;
  1551	
  1552	    //----------------------------------------------------------------
  1553	    // RuleBasedTransliterator support
  1554	    //----------------------------------------------------------------
  1555	
  1556	private:
  1557	
  1558	    /**
  1559	     * Returns <tt>true</tt> if this set contains any character whose low byte
  1560	     * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
  1561	     * indexing.
  1562	     */
  1563	    virtual UBool matchesIndexValue(uint8_t v) const override;
  1564	
  1565	private:
  1566	    friend class RBBIRuleScanner;
  1567	
  1568	    //----------------------------------------------------------------
  1569	    // Implementation: Clone as thawed (see ICU4J Freezable)
  1570	    //----------------------------------------------------------------
  1571	
  1572	    UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
  1573	    UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);
  1574	
  1575	    //----------------------------------------------------------------
  1576	    // Implementation: Pattern parsing
  1577	    //----------------------------------------------------------------
  1578	
  1579	    void applyPatternIgnoreSpace(const UnicodeString& pattern,
  1580	                                 ParsePosition& pos,
  1581	                                 const SymbolTable* symbols,
  1582	                                 UErrorCode& status);
  1583	
  1584	    void applyPattern(RuleCharacterIterator& chars,
  1585	                      const SymbolTable* symbols,
  1586	                      UnicodeString& rebuiltPat,
  1587	                      uint32_t options,
  1588	                      UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
  1589	                      int32_t depth,
  1590	                      UErrorCode& ec);
  1591	
  1592	    void closeOverCaseInsensitive(bool simple);
  1593	    void closeOverAddCaseMappings();
  1594	
  1595	    //----------------------------------------------------------------
  1596	    // Implementation: Utility methods
  1597	    //----------------------------------------------------------------
  1598	
  1599	    static int32_t nextCapacity(int32_t minCapacity);
  1600	
  1601	    bool ensureCapacity(int32_t newLen);
  1602	
  1603	    bool ensureBufferCapacity(int32_t newLen);
  1604	
  1605	    void swapBuffers(void);
  1606	
  1607	    UBool allocateStrings(UErrorCode &status);
  1608	    int32_t stringsSize() const;
  1609	    UBool stringsContains(const UnicodeString &s) const;
  1610	
  1611	    UnicodeString& _toPattern(UnicodeString& result,
  1612	                              UBool escapeUnprintable) const;
  1613	
  1614	    UnicodeString& _generatePattern(UnicodeString& result,
  1615	                                    UBool escapeUnprintable) const;
  1616	
  1617	    static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
  1618	
  1619	    static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
  1620	
  1621	    static void _appendToPat(UnicodeString &result, UChar32 start, UChar32 end,
  1622	                             UBool escapeUnprintable);
  1623	
  1624	    //----------------------------------------------------------------
  1625	    // Implementation: Fundamental operators
  1626	    //----------------------------------------------------------------
  1627	
  1628	    void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
  1629	
  1630	    void add(const UChar32* other, int32_t otherLen, int8_t polarity);
  1631	
  1632	    void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
  1633	
  1634	    /**
  1635	     * Return true if the given position, in the given pattern, appears
  1636	     * to be the start of a property set pattern [:foo:], \\p{foo}, or
  1637	     * \\P{foo}, or \\N{name}.
  1638	     */
  1639	    static UBool resemblesPropertyPattern(const UnicodeString& pattern,
  1640	                                          int32_t pos);
  1641	
  1642	    static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
  1643	                                          int32_t iterOpts);
  1644	
  1645	    /**
  1646	     * Parse the given property pattern at the given parse position
  1647	     * and set this UnicodeSet to the result.
  1648	     *
  1649	     * The original design document is out of date, but still useful.
  1650	     * Ignore the property and value names:
  1651	     * https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/unicodeset_properties.html
  1652	     *
  1653	     * Recognized syntax:
  1654	     *
  1655	     * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
  1656	     * \\p{foo} \\P{foo}  - white space not allowed within "\\p" or "\\P"
  1657	     * \\N{name}         - white space not allowed within "\\N"
  1658	     *
  1659	     * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
  1660	     * Case is ignored except in "\\p" and "\\P" and "\\N".  In 'name' leading
  1661	     * and trailing space is deleted, and internal runs of whitespace
  1662	     * are collapsed to a single space.
  1663	     *
  1664	     * We support binary properties, enumerated properties, and the
  1665	     * following non-enumerated properties:
  1666	     *
  1667	     *  Numeric_Value
  1668	     *  Name
  1669	     *  Unicode_1_Name
  1670	     *
  1671	     * @param pattern the pattern string
  1672	     * @param ppos on entry, the position at which to begin parsing.
  1673	     * This should be one of the locations marked '^':
  1674	     *
  1675	     *   [:blah:]     \\p{blah}     \\P{blah}     \\N{name}
  1676	     *   ^       %    ^       %    ^       %    ^       %
  1677	     *
  1678	     * On return, the position after the last character parsed, that is,
  1679	     * the locations marked '%'.  If the parse fails, ppos is returned
  1680	     * unchanged.
  1681	     * @param ec status
  1682	     * @return a reference to this.
  1683	     */
  1684	    UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
  1685	                                     ParsePosition& ppos,
  1686	                                     UErrorCode &ec);
  1687	
  1688	    void applyPropertyPattern(RuleCharacterIterator& chars,
  1689	                              UnicodeString& rebuiltPat,
  1690	                              UErrorCode& ec);
  1691	
  1692	    /**
  1693	     * A filter that returns true if the given code point should be
  1694	     * included in the UnicodeSet being constructed.
  1695	     */
  1696	    typedef UBool (*Filter)(UChar32 codePoint, void* context);
  1697	
  1698	    /**
  1699	     * Given a filter, set this UnicodeSet to the code points
  1700	     * contained by that filter.  The filter MUST be
  1701	     * property-conformant.  That is, if it returns value v for one
  1702	     * code point, then it must return v for all affiliated code
  1703	     * points, as defined by the inclusions list.  See
  1704	     * getInclusions().
  1705	     * src is a UPropertySource value.
  1706	     */
  1707	    void applyFilter(Filter filter,
  1708	                     void* context,
  1709	                     const UnicodeSet* inclusions,
  1710	                     UErrorCode &status);
  1711	
  1712	    /**
  1713	     * Set the new pattern to cache.
  1714	     */
  1715	    void setPattern(const UnicodeString& newPat) {
  1716	        setPattern(newPat.getBuffer(), newPat.length());
  1717	    }
  1718	    void setPattern(const char16_t *newPat, int32_t newPatLen);
  1719	    /**
  1720	     * Release existing cached pattern.
  1721	     */
  1722	    void releasePattern();
  1723	
  1724	    friend class UnicodeSetIterator;
  1725	};
  1726	
  1727	
  1728	
  1729	inline bool UnicodeSet::operator!=(const UnicodeSet& o) const {
  1730	    return !operator==(o);
  1731	}
  1732	
  1733	inline UBool UnicodeSet::isFrozen() const {
  1734	    return (UBool)(bmpSet!=nullptr || stringSpan!=nullptr);
  1735	}
  1736	
  1737	inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
  1738	    return !containsNone(start, end);
  1739	}
  1740	
  1741	inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
  1742	    return !containsNone(s);
  1743	}
  1744	
  1745	inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
  1746	    return !containsNone(s);
  1747	}
  1748	
  1749	inline UBool UnicodeSet::isBogus() const {
  1750	    return (UBool)(fFlags & kIsBogus);
  1751	}
  1752	
  1753	inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
  1754	    return reinterpret_cast<UnicodeSet *>(uset);
  1755	}
  1756	
  1757	inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
  1758	    return reinterpret_cast<const UnicodeSet *>(uset);
  1759	}
  1760	
  1761	inline USet *UnicodeSet::toUSet() {
  1762	    return reinterpret_cast<USet *>(this);
  1763	}
  1764	
  1765	inline const USet *UnicodeSet::toUSet() const {
  1766	    return reinterpret_cast<const USet *>(this);
  1767	}
  1768	
  1769	inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
  1770	    int32_t sLength=s.length();
  1771	    if(start<0) {
  1772	        start=0;
  1773	    } else if(start>sLength) {
  1774	        start=sLength;
  1775	    }
  1776	    return start+span(s.getBuffer()+start, sLength-start, spanCondition);
  1777	}
  1778	
  1779	inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
  1780	    int32_t sLength=s.length();
  1781	    if(limit<0) {
  1782	        limit=0;
  1783	    } else if(limit>sLength) {
  1784	        limit=sLength;
  1785	    }
  1786	    return spanBack(s.getBuffer(), limit, spanCondition);
  1787	}
  1788	
  1789	U_NAMESPACE_END
  1790	
  1791	#endif /* U_SHOW_CPLUSPLUS_API */
  1792	
  1793	#endif