Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/unicode/normalizer2.h


$ cat -n /usr/include/unicode/normalizer2.h

     1	// © 2016 and later: Unicode, Inc. and others.
     2	// License & terms of use: http://www.unicode.org/copyright.html
     3	/*
     4	*******************************************************************************
     5	*
     6	*   Copyright (C) 2009-2013, International Business Machines
     7	*   Corporation and others.  All Rights Reserved.
     8	*
     9	*******************************************************************************
    10	*   file name:  normalizer2.h
    11	*   encoding:   UTF-8
    12	*   tab size:   8 (not used)
    13	*   indentation:4
    14	*
    15	*   created on: 2009nov22
    16	*   created by: Markus W. Scherer
    17	*/
    18	
    19	#ifndef __NORMALIZER2_H__
    20	#define __NORMALIZER2_H__
    21	
    22	/**
    23	 * \file
    24	 * \brief C++ API: New API for Unicode Normalization.
    25	 */
    26	
    27	#include "unicode/utypes.h"
    28	
    29	#if U_SHOW_CPLUSPLUS_API
    30	
    31	#if !UCONFIG_NO_NORMALIZATION
    32	
    33	#include "unicode/stringpiece.h"
    34	#include "unicode/uniset.h"
    35	#include "unicode/unistr.h"
    36	#include "unicode/unorm2.h"
    37	
    38	U_NAMESPACE_BEGIN
    39	
    40	class ByteSink;
    41	
    42	/**
    43	 * Unicode normalization functionality for standard Unicode normalization or
    44	 * for using custom mapping tables.
    45	 * All instances of this class are unmodifiable/immutable.
    46	 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
    47	 * The Normalizer2 class is not intended for public subclassing.
    48	 *
    49	 * The primary functions are to produce a normalized string and to detect whether
    50	 * a string is already normalized.
    51	 * The most commonly used normalization forms are those defined in
    52	 * http://www.unicode.org/unicode/reports/tr15/
    53	 * However, this API supports additional normalization forms for specialized purposes.
    54	 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
    55	 * and can be used in implementations of UTS #46.
    56	 *
    57	 * Not only are the standard compose and decompose modes supplied,
    58	 * but additional modes are provided as documented in the Mode enum.
    59	 *
    60	 * Some of the functions in this class identify normalization boundaries.
    61	 * At a normalization boundary, the portions of the string
    62	 * before it and starting from it do not interact and can be handled independently.
    63	 *
    64	 * The spanQuickCheckYes() stops at a normalization boundary.
    65	 * When the goal is a normalized string, then the text before the boundary
    66	 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
    67	 *
    68	 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
    69	 * a character is guaranteed to be at a normalization boundary,
    70	 * regardless of context.
    71	 * This is used for moving from one normalization boundary to the next
    72	 * or preceding boundary, and for performing iterative normalization.
    73	 *
    74	 * Iterative normalization is useful when only a small portion of a
    75	 * longer string needs to be processed.
    76	 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
    77	 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
    78	 * (to process only the substring for which sort key bytes are computed).
    79	 *
    80	 * The set of normalization boundaries returned by these functions may not be
    81	 * complete: There may be more boundaries that could be returned.
    82	 * Different functions may return different boundaries.
    83	 * @stable ICU 4.4
    84	 */
    85	class U_COMMON_API Normalizer2 : public UObject {
    86	public:
    87	    /**
    88	     * Destructor.
    89	     * @stable ICU 4.4
    90	     */
    91	    ~Normalizer2();
    92	
    93	    /**
    94	     * Returns a Normalizer2 instance for Unicode NFC normalization.
    95	     * Same as getInstance(nullptr, "nfc", UNORM2_COMPOSE, errorCode).
    96	     * Returns an unmodifiable singleton instance. Do not delete it.
    97	     * @param errorCode Standard ICU error code. Its input value must
    98	     *                  pass the U_SUCCESS() test, or else the function returns
    99	     *                  immediately. Check for U_FAILURE() on output or use with
   100	     *                  function chaining. (See User Guide for details.)
   101	     * @return the requested Normalizer2, if successful
   102	     * @stable ICU 49
   103	     */
   104	    static const Normalizer2 *
   105	    getNFCInstance(UErrorCode &errorCode);
   106	
   107	    /**
   108	     * Returns a Normalizer2 instance for Unicode NFD normalization.
   109	     * Same as getInstance(nullptr, "nfc", UNORM2_DECOMPOSE, errorCode).
   110	     * Returns an unmodifiable singleton instance. Do not delete it.
   111	     * @param errorCode Standard ICU error code. Its input value must
   112	     *                  pass the U_SUCCESS() test, or else the function returns
   113	     *                  immediately. Check for U_FAILURE() on output or use with
   114	     *                  function chaining. (See User Guide for details.)
   115	     * @return the requested Normalizer2, if successful
   116	     * @stable ICU 49
   117	     */
   118	    static const Normalizer2 *
   119	    getNFDInstance(UErrorCode &errorCode);
   120	
   121	    /**
   122	     * Returns a Normalizer2 instance for Unicode NFKC normalization.
   123	     * Same as getInstance(nullptr, "nfkc", UNORM2_COMPOSE, errorCode).
   124	     * Returns an unmodifiable singleton instance. Do not delete it.
   125	     * @param errorCode Standard ICU error code. Its input value must
   126	     *                  pass the U_SUCCESS() test, or else the function returns
   127	     *                  immediately. Check for U_FAILURE() on output or use with
   128	     *                  function chaining. (See User Guide for details.)
   129	     * @return the requested Normalizer2, if successful
   130	     * @stable ICU 49
   131	     */
   132	    static const Normalizer2 *
   133	    getNFKCInstance(UErrorCode &errorCode);
   134	
   135	    /**
   136	     * Returns a Normalizer2 instance for Unicode NFKD normalization.
   137	     * Same as getInstance(nullptr, "nfkc", UNORM2_DECOMPOSE, errorCode).
   138	     * Returns an unmodifiable singleton instance. Do not delete it.
   139	     * @param errorCode Standard ICU error code. Its input value must
   140	     *                  pass the U_SUCCESS() test, or else the function returns
   141	     *                  immediately. Check for U_FAILURE() on output or use with
   142	     *                  function chaining. (See User Guide for details.)
   143	     * @return the requested Normalizer2, if successful
   144	     * @stable ICU 49
   145	     */
   146	    static const Normalizer2 *
   147	    getNFKDInstance(UErrorCode &errorCode);
   148	
   149	    /**
   150	     * Returns a Normalizer2 instance for Unicode toNFKC_Casefold() normalization
   151	     * which is equivalent to applying the NFKC_Casefold mappings and then NFC.
   152	     * See https://www.unicode.org/reports/tr44/#NFKC_Casefold
   153	     *
   154	     * Same as getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, errorCode).
   155	     * Returns an unmodifiable singleton instance. Do not delete it.
   156	     * @param errorCode Standard ICU error code. Its input value must
   157	     *                  pass the U_SUCCESS() test, or else the function returns
   158	     *                  immediately. Check for U_FAILURE() on output or use with
   159	     *                  function chaining. (See User Guide for details.)
   160	     * @return the requested Normalizer2, if successful
   161	     * @stable ICU 49
   162	     */
   163	    static const Normalizer2 *
   164	    getNFKCCasefoldInstance(UErrorCode &errorCode);
   165	
   166	#ifndef U_HIDE_DRAFT_API
   167	    /**
   168	     * Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
   169	     * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
   170	     * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
   171	     *
   172	     * Same as getInstance(nullptr, "nfkc_scf", UNORM2_COMPOSE, errorCode).
   173	     * Returns an unmodifiable singleton instance. Do not delete it.
   174	     * @param errorCode Standard ICU error code. Its input value must
   175	     *                  pass the U_SUCCESS() test, or else the function returns
   176	     *                  immediately. Check for U_FAILURE() on output or use with
   177	     *                  function chaining. (See User Guide for details.)
   178	     * @return the requested Normalizer2, if successful
   179	     * @draft ICU 74
   180	     */
   181	    static const Normalizer2 *
   182	    getNFKCSimpleCasefoldInstance(UErrorCode &errorCode);
   183	#endif  // U_HIDE_DRAFT_API
   184	
   185	    /**
   186	     * Returns a Normalizer2 instance which uses the specified data file
   187	     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
   188	     * and which composes or decomposes text according to the specified mode.
   189	     * Returns an unmodifiable singleton instance. Do not delete it.
   190	     *
   191	     * Use packageName=nullptr for data files that are part of ICU's own data.
   192	     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
   193	     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
   194	     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
   195	     *
   196	     * @param packageName nullptr for ICU built-in data, otherwise application data package name
   197	     * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
   198	     * @param mode normalization mode (compose or decompose etc.)
   199	     * @param errorCode Standard ICU error code. Its input value must
   200	     *                  pass the U_SUCCESS() test, or else the function returns
   201	     *                  immediately. Check for U_FAILURE() on output or use with
   202	     *                  function chaining. (See User Guide for details.)
   203	     * @return the requested Normalizer2, if successful
   204	     * @stable ICU 4.4
   205	     */
   206	    static const Normalizer2 *
   207	    getInstance(const char *packageName,
   208	                const char *name,
   209	                UNormalization2Mode mode,
   210	                UErrorCode &errorCode);
   211	
   212	    /**
   213	     * Returns the normalized form of the source string.
   214	     * @param src source string
   215	     * @param errorCode Standard ICU error code. Its input value must
   216	     *                  pass the U_SUCCESS() test, or else the function returns
   217	     *                  immediately. Check for U_FAILURE() on output or use with
   218	     *                  function chaining. (See User Guide for details.)
   219	     * @return normalized src
   220	     * @stable ICU 4.4
   221	     */
   222	    UnicodeString
   223	    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
   224	        UnicodeString result;
   225	        normalize(src, result, errorCode);
   226	        return result;
   227	    }
   228	    /**
   229	     * Writes the normalized form of the source string to the destination string
   230	     * (replacing its contents) and returns the destination string.
   231	     * The source and destination strings must be different objects.
   232	     * @param src source string
   233	     * @param dest destination string; its contents is replaced with normalized src
   234	     * @param errorCode Standard ICU error code. Its input value must
   235	     *                  pass the U_SUCCESS() test, or else the function returns
   236	     *                  immediately. Check for U_FAILURE() on output or use with
   237	     *                  function chaining. (See User Guide for details.)
   238	     * @return dest
   239	     * @stable ICU 4.4
   240	     */
   241	    virtual UnicodeString &
   242	    normalize(const UnicodeString &src,
   243	              UnicodeString &dest,
   244	              UErrorCode &errorCode) const = 0;
   245	
   246	    /**
   247	     * Normalizes a UTF-8 string and optionally records how source substrings
   248	     * relate to changed and unchanged result substrings.
   249	     *
   250	     * Implemented completely for all built-in modes except for FCD.
   251	     * The base class implementation converts to & from UTF-16 and does not support edits.
   252	     *
   253	     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
   254	     * @param src       Source UTF-8 string.
   255	     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
   256	     *                  sink.Flush() is called at the end.
   257	     * @param edits     Records edits for index mapping, working with styled text,
   258	     *                  and getting only changes (if any).
   259	     *                  The Edits contents is undefined if any error occurs.
   260	     *                  This function calls edits->reset() first unless
   261	     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
   262	     * @param errorCode Standard ICU error code. Its input value must
   263	     *                  pass the U_SUCCESS() test, or else the function returns
   264	     *                  immediately. Check for U_FAILURE() on output or use with
   265	     *                  function chaining. (See User Guide for details.)
   266	     * @stable ICU 60
   267	     */
   268	    virtual void
   269	    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
   270	                  Edits *edits, UErrorCode &errorCode) const;
   271	
   272	    /**
   273	     * Appends the normalized form of the second string to the first string
   274	     * (merging them at the boundary) and returns the first string.
   275	     * The result is normalized if the first string was normalized.
   276	     * The first and second strings must be different objects.
   277	     * @param first string, should be normalized
   278	     * @param second string, will be normalized
   279	     * @param errorCode Standard ICU error code. Its input value must
   280	     *                  pass the U_SUCCESS() test, or else the function returns
   281	     *                  immediately. Check for U_FAILURE() on output or use with
   282	     *                  function chaining. (See User Guide for details.)
   283	     * @return first
   284	     * @stable ICU 4.4
   285	     */
   286	    virtual UnicodeString &
   287	    normalizeSecondAndAppend(UnicodeString &first,
   288	                             const UnicodeString &second,
   289	                             UErrorCode &errorCode) const = 0;
   290	    /**
   291	     * Appends the second string to the first string
   292	     * (merging them at the boundary) and returns the first string.
   293	     * The result is normalized if both the strings were normalized.
   294	     * The first and second strings must be different objects.
   295	     * @param first string, should be normalized
   296	     * @param second string, should be normalized
   297	     * @param errorCode Standard ICU error code. Its input value must
   298	     *                  pass the U_SUCCESS() test, or else the function returns
   299	     *                  immediately. Check for U_FAILURE() on output or use with
   300	     *                  function chaining. (See User Guide for details.)
   301	     * @return first
   302	     * @stable ICU 4.4
   303	     */
   304	    virtual UnicodeString &
   305	    append(UnicodeString &first,
   306	           const UnicodeString &second,
   307	           UErrorCode &errorCode) const = 0;
   308	
   309	    /**
   310	     * Gets the decomposition mapping of c.
   311	     * Roughly equivalent to normalizing the String form of c
   312	     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
   313	     * returns false and does not write a string
   314	     * if c does not have a decomposition mapping in this instance's data.
   315	     * This function is independent of the mode of the Normalizer2.
   316	     * @param c code point
   317	     * @param decomposition String object which will be set to c's
   318	     *                      decomposition mapping, if there is one.
   319	     * @return true if c has a decomposition, otherwise false
   320	     * @stable ICU 4.6
   321	     */
   322	    virtual UBool
   323	    getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
   324	
   325	    /**
   326	     * Gets the raw decomposition mapping of c.
   327	     *
   328	     * This is similar to the getDecomposition() method but returns the
   329	     * raw decomposition mapping as specified in UnicodeData.txt or
   330	     * (for custom data) in the mapping files processed by the gennorm2 tool.
   331	     * By contrast, getDecomposition() returns the processed,
   332	     * recursively-decomposed version of this mapping.
   333	     *
   334	     * When used on a standard NFKC Normalizer2 instance,
   335	     * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
   336	     *
   337	     * When used on a standard NFC Normalizer2 instance,
   338	     * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
   339	     * in this case, the result contains either one or two code points (=1..4 char16_ts).
   340	     *
   341	     * This function is independent of the mode of the Normalizer2.
   342	     * The default implementation returns false.
   343	     * @param c code point
   344	     * @param decomposition String object which will be set to c's
   345	     *                      raw decomposition mapping, if there is one.
   346	     * @return true if c has a decomposition, otherwise false
   347	     * @stable ICU 49
   348	     */
   349	    virtual UBool
   350	    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
   351	
   352	    /**
   353	     * Performs pairwise composition of a & b and returns the composite if there is one.
   354	     *
   355	     * Returns a composite code point c only if c has a two-way mapping to a+b.
   356	     * In standard Unicode normalization, this means that
   357	     * c has a canonical decomposition to a+b
   358	     * and c does not have the Full_Composition_Exclusion property.
   359	     *
   360	     * This function is independent of the mode of the Normalizer2.
   361	     * The default implementation returns a negative value.
   362	     * @param a A (normalization starter) code point.
   363	     * @param b Another code point.
   364	     * @return The non-negative composite code point if there is one; otherwise a negative value.
   365	     * @stable ICU 49
   366	     */
   367	    virtual UChar32
   368	    composePair(UChar32 a, UChar32 b) const;
   369	
   370	    /**
   371	     * Gets the combining class of c.
   372	     * The default implementation returns 0
   373	     * but all standard implementations return the Unicode Canonical_Combining_Class value.
   374	     * @param c code point
   375	     * @return c's combining class
   376	     * @stable ICU 49
   377	     */
   378	    virtual uint8_t
   379	    getCombiningClass(UChar32 c) const;
   380	
   381	    /**
   382	     * Tests if the string is normalized.
   383	     * Internally, in cases where the quickCheck() method would return "maybe"
   384	     * (which is only possible for the two COMPOSE modes) this method
   385	     * resolves to "yes" or "no" to provide a definitive result,
   386	     * at the cost of doing more work in those cases.
   387	     * @param s input string
   388	     * @param errorCode Standard ICU error code. Its input value must
   389	     *                  pass the U_SUCCESS() test, or else the function returns
   390	     *                  immediately. Check for U_FAILURE() on output or use with
   391	     *                  function chaining. (See User Guide for details.)
   392	     * @return true if s is normalized
   393	     * @stable ICU 4.4
   394	     */
   395	    virtual UBool
   396	    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
   397	    /**
   398	     * Tests if the UTF-8 string is normalized.
   399	     * Internally, in cases where the quickCheck() method would return "maybe"
   400	     * (which is only possible for the two COMPOSE modes) this method
   401	     * resolves to "yes" or "no" to provide a definitive result,
   402	     * at the cost of doing more work in those cases.
   403	     *
   404	     * This works for all normalization modes.
   405	     * It is optimized for UTF-8 for all built-in modes except for FCD.
   406	     * The base class implementation converts to UTF-16 and calls isNormalized().
   407	     *
   408	     * @param s UTF-8 input string
   409	     * @param errorCode Standard ICU error code. Its input value must
   410	     *                  pass the U_SUCCESS() test, or else the function returns
   411	     *                  immediately. Check for U_FAILURE() on output or use with
   412	     *                  function chaining. (See User Guide for details.)
   413	     * @return true if s is normalized
   414	     * @stable ICU 60
   415	     */
   416	    virtual UBool
   417	    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
   418	
   419	
   420	    /**
   421	     * Tests if the string is normalized.
   422	     * For the two COMPOSE modes, the result could be "maybe" in cases that
   423	     * would take a little more work to resolve definitively.
   424	     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
   425	     * combination of quick check + normalization, to avoid
   426	     * re-checking the "yes" prefix.
   427	     * @param s input string
   428	     * @param errorCode Standard ICU error code. Its input value must
   429	     *                  pass the U_SUCCESS() test, or else the function returns
   430	     *                  immediately. Check for U_FAILURE() on output or use with
   431	     *                  function chaining. (See User Guide for details.)
   432	     * @return UNormalizationCheckResult
   433	     * @stable ICU 4.4
   434	     */
   435	    virtual UNormalizationCheckResult
   436	    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
   437	
   438	    /**
   439	     * Returns the end of the normalized substring of the input string.
   440	     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
   441	     * the substring <code>UnicodeString(s, 0, end)</code>
   442	     * will pass the quick check with a "yes" result.
   443	     *
   444	     * The returned end index is usually one or more characters before the
   445	     * "no" or "maybe" character: The end index is at a normalization boundary.
   446	     * (See the class documentation for more about normalization boundaries.)
   447	     *
   448	     * When the goal is a normalized string and most input strings are expected
   449	     * to be normalized already, then call this method,
   450	     * and if it returns a prefix shorter than the input string,
   451	     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
   452	     * @param s input string
   453	     * @param errorCode Standard ICU error code. Its input value must
   454	     *                  pass the U_SUCCESS() test, or else the function returns
   455	     *                  immediately. Check for U_FAILURE() on output or use with
   456	     *                  function chaining. (See User Guide for details.)
   457	     * @return "yes" span end index
   458	     * @stable ICU 4.4
   459	     */
   460	    virtual int32_t
   461	    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
   462	
   463	    /**
   464	     * Tests if the character always has a normalization boundary before it,
   465	     * regardless of context.
   466	     * If true, then the character does not normalization-interact with
   467	     * preceding characters.
   468	     * In other words, a string containing this character can be normalized
   469	     * by processing portions before this character and starting from this
   470	     * character independently.
   471	     * This is used for iterative normalization. See the class documentation for details.
   472	     * @param c character to test
   473	     * @return true if c has a normalization boundary before it
   474	     * @stable ICU 4.4
   475	     */
   476	    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
   477	
   478	    /**
   479	     * Tests if the character always has a normalization boundary after it,
   480	     * regardless of context.
   481	     * If true, then the character does not normalization-interact with
   482	     * following characters.
   483	     * In other words, a string containing this character can be normalized
   484	     * by processing portions up to this character and after this
   485	     * character independently.
   486	     * This is used for iterative normalization. See the class documentation for details.
   487	     * Note that this operation may be significantly slower than hasBoundaryBefore().
   488	     * @param c character to test
   489	     * @return true if c has a normalization boundary after it
   490	     * @stable ICU 4.4
   491	     */
   492	    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
   493	
   494	    /**
   495	     * Tests if the character is normalization-inert.
   496	     * If true, then the character does not change, nor normalization-interact with
   497	     * preceding or following characters.
   498	     * In other words, a string containing this character can be normalized
   499	     * by processing portions before this character and after this
   500	     * character independently.
   501	     * This is used for iterative normalization. See the class documentation for details.
   502	     * Note that this operation may be significantly slower than hasBoundaryBefore().
   503	     * @param c character to test
   504	     * @return true if c is normalization-inert
   505	     * @stable ICU 4.4
   506	     */
   507	    virtual UBool isInert(UChar32 c) const = 0;
   508	};
   509	
   510	/**
   511	 * Normalization filtered by a UnicodeSet.
   512	 * Normalizes portions of the text contained in the filter set and leaves
   513	 * portions not contained in the filter set unchanged.
   514	 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
   515	 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
   516	 * This class implements all of (and only) the Normalizer2 API.
   517	 * An instance of this class is unmodifiable/immutable but is constructed and
   518	 * must be destructed by the owner.
   519	 * @stable ICU 4.4
   520	 */
   521	class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
   522	public:
   523	    /**
   524	     * Constructs a filtered normalizer wrapping any Normalizer2 instance
   525	     * and a filter set.
   526	     * Both are aliased and must not be modified or deleted while this object
   527	     * is used.
   528	     * The filter set should be frozen; otherwise the performance will suffer greatly.
   529	     * @param n2 wrapped Normalizer2 instance
   530	     * @param filterSet UnicodeSet which determines the characters to be normalized
   531	     * @stable ICU 4.4
   532	     */
   533	    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
   534	            norm2(n2), set(filterSet) {}
   535	
   536	    /**
   537	     * Destructor.
   538	     * @stable ICU 4.4
   539	     */
   540	    ~FilteredNormalizer2();
   541	
   542	    /**
   543	     * Writes the normalized form of the source string to the destination string
   544	     * (replacing its contents) and returns the destination string.
   545	     * The source and destination strings must be different objects.
   546	     * @param src source string
   547	     * @param dest destination string; its contents is replaced with normalized src
   548	     * @param errorCode Standard ICU error code. Its input value must
   549	     *                  pass the U_SUCCESS() test, or else the function returns
   550	     *                  immediately. Check for U_FAILURE() on output or use with
   551	     *                  function chaining. (See User Guide for details.)
   552	     * @return dest
   553	     * @stable ICU 4.4
   554	     */
   555	    virtual UnicodeString &
   556	    normalize(const UnicodeString &src,
   557	              UnicodeString &dest,
   558	              UErrorCode &errorCode) const override;
   559	
   560	    /**
   561	     * Normalizes a UTF-8 string and optionally records how source substrings
   562	     * relate to changed and unchanged result substrings.
   563	     *
   564	     * Implemented completely for most built-in modes except for FCD.
   565	     * The base class implementation converts to & from UTF-16 and does not support edits.
   566	     *
   567	     * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
   568	     * @param src       Source UTF-8 string.
   569	     * @param sink      A ByteSink to which the normalized UTF-8 result string is written.
   570	     *                  sink.Flush() is called at the end.
   571	     * @param edits     Records edits for index mapping, working with styled text,
   572	     *                  and getting only changes (if any).
   573	     *                  The Edits contents is undefined if any error occurs.
   574	     *                  This function calls edits->reset() first unless
   575	     *                  options includes U_EDITS_NO_RESET. edits can be nullptr.
   576	     * @param errorCode Standard ICU error code. Its input value must
   577	     *                  pass the U_SUCCESS() test, or else the function returns
   578	     *                  immediately. Check for U_FAILURE() on output or use with
   579	     *                  function chaining. (See User Guide for details.)
   580	     * @stable ICU 60
   581	     */
   582	    virtual void
   583	    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
   584	                  Edits *edits, UErrorCode &errorCode) const override;
   585	
   586	    /**
   587	     * Appends the normalized form of the second string to the first string
   588	     * (merging them at the boundary) and returns the first string.
   589	     * The result is normalized if the first string was normalized.
   590	     * The first and second strings must be different objects.
   591	     * @param first string, should be normalized
   592	     * @param second string, will be normalized
   593	     * @param errorCode Standard ICU error code. Its input value must
   594	     *                  pass the U_SUCCESS() test, or else the function returns
   595	     *                  immediately. Check for U_FAILURE() on output or use with
   596	     *                  function chaining. (See User Guide for details.)
   597	     * @return first
   598	     * @stable ICU 4.4
   599	     */
   600	    virtual UnicodeString &
   601	    normalizeSecondAndAppend(UnicodeString &first,
   602	                             const UnicodeString &second,
   603	                             UErrorCode &errorCode) const override;
   604	    /**
   605	     * Appends the second string to the first string
   606	     * (merging them at the boundary) and returns the first string.
   607	     * The result is normalized if both the strings were normalized.
   608	     * The first and second strings must be different objects.
   609	     * @param first string, should be normalized
   610	     * @param second string, should be normalized
   611	     * @param errorCode Standard ICU error code. Its input value must
   612	     *                  pass the U_SUCCESS() test, or else the function returns
   613	     *                  immediately. Check for U_FAILURE() on output or use with
   614	     *                  function chaining. (See User Guide for details.)
   615	     * @return first
   616	     * @stable ICU 4.4
   617	     */
   618	    virtual UnicodeString &
   619	    append(UnicodeString &first,
   620	           const UnicodeString &second,
   621	           UErrorCode &errorCode) const override;
   622	
   623	    /**
   624	     * Gets the decomposition mapping of c.
   625	     * For details see the base class documentation.
   626	     *
   627	     * This function is independent of the mode of the Normalizer2.
   628	     * @param c code point
   629	     * @param decomposition String object which will be set to c's
   630	     *                      decomposition mapping, if there is one.
   631	     * @return true if c has a decomposition, otherwise false
   632	     * @stable ICU 4.6
   633	     */
   634	    virtual UBool
   635	    getDecomposition(UChar32 c, UnicodeString &decomposition) const override;
   636	
   637	    /**
   638	     * Gets the raw decomposition mapping of c.
   639	     * For details see the base class documentation.
   640	     *
   641	     * This function is independent of the mode of the Normalizer2.
   642	     * @param c code point
   643	     * @param decomposition String object which will be set to c's
   644	     *                      raw decomposition mapping, if there is one.
   645	     * @return true if c has a decomposition, otherwise false
   646	     * @stable ICU 49
   647	     */
   648	    virtual UBool
   649	    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override;
   650	
   651	    /**
   652	     * Performs pairwise composition of a & b and returns the composite if there is one.
   653	     * For details see the base class documentation.
   654	     *
   655	     * This function is independent of the mode of the Normalizer2.
   656	     * @param a A (normalization starter) code point.
   657	     * @param b Another code point.
   658	     * @return The non-negative composite code point if there is one; otherwise a negative value.
   659	     * @stable ICU 49
   660	     */
   661	    virtual UChar32
   662	    composePair(UChar32 a, UChar32 b) const override;
   663	
   664	    /**
   665	     * Gets the combining class of c.
   666	     * The default implementation returns 0
   667	     * but all standard implementations return the Unicode Canonical_Combining_Class value.
   668	     * @param c code point
   669	     * @return c's combining class
   670	     * @stable ICU 49
   671	     */
   672	    virtual uint8_t
   673	    getCombiningClass(UChar32 c) const override;
   674	
   675	    /**
   676	     * Tests if the string is normalized.
   677	     * For details see the Normalizer2 base class documentation.
   678	     * @param s input string
   679	     * @param errorCode Standard ICU error code. Its input value must
   680	     *                  pass the U_SUCCESS() test, or else the function returns
   681	     *                  immediately. Check for U_FAILURE() on output or use with
   682	     *                  function chaining. (See User Guide for details.)
   683	     * @return true if s is normalized
   684	     * @stable ICU 4.4
   685	     */
   686	    virtual UBool
   687	    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override;
   688	    /**
   689	     * Tests if the UTF-8 string is normalized.
   690	     * Internally, in cases where the quickCheck() method would return "maybe"
   691	     * (which is only possible for the two COMPOSE modes) this method
   692	     * resolves to "yes" or "no" to provide a definitive result,
   693	     * at the cost of doing more work in those cases.
   694	     *
   695	     * This works for all normalization modes.
   696	     * It is optimized for UTF-8 for all built-in modes except for FCD.
   697	     * The base class implementation converts to UTF-16 and calls isNormalized().
   698	     *
   699	     * @param s UTF-8 input string
   700	     * @param errorCode Standard ICU error code. Its input value must
   701	     *                  pass the U_SUCCESS() test, or else the function returns
   702	     *                  immediately. Check for U_FAILURE() on output or use with
   703	     *                  function chaining. (See User Guide for details.)
   704	     * @return true if s is normalized
   705	     * @stable ICU 60
   706	     */
   707	    virtual UBool
   708	    isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const override;
   709	    /**
   710	     * Tests if the string is normalized.
   711	     * For details see the Normalizer2 base class documentation.
   712	     * @param s input string
   713	     * @param errorCode Standard ICU error code. Its input value must
   714	     *                  pass the U_SUCCESS() test, or else the function returns
   715	     *                  immediately. Check for U_FAILURE() on output or use with
   716	     *                  function chaining. (See User Guide for details.)
   717	     * @return UNormalizationCheckResult
   718	     * @stable ICU 4.4
   719	     */
   720	    virtual UNormalizationCheckResult
   721	    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override;
   722	    /**
   723	     * Returns the end of the normalized substring of the input string.
   724	     * For details see the Normalizer2 base class documentation.
   725	     * @param s input string
   726	     * @param errorCode Standard ICU error code. Its input value must
   727	     *                  pass the U_SUCCESS() test, or else the function returns
   728	     *                  immediately. Check for U_FAILURE() on output or use with
   729	     *                  function chaining. (See User Guide for details.)
   730	     * @return "yes" span end index
   731	     * @stable ICU 4.4
   732	     */
   733	    virtual int32_t
   734	    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override;
   735	
   736	    /**
   737	     * Tests if the character always has a normalization boundary before it,
   738	     * regardless of context.
   739	     * For details see the Normalizer2 base class documentation.
   740	     * @param c character to test
   741	     * @return true if c has a normalization boundary before it
   742	     * @stable ICU 4.4
   743	     */
   744	    virtual UBool hasBoundaryBefore(UChar32 c) const override;
   745	
   746	    /**
   747	     * Tests if the character always has a normalization boundary after it,
   748	     * regardless of context.
   749	     * For details see the Normalizer2 base class documentation.
   750	     * @param c character to test
   751	     * @return true if c has a normalization boundary after it
   752	     * @stable ICU 4.4
   753	     */
   754	    virtual UBool hasBoundaryAfter(UChar32 c) const override;
   755	
   756	    /**
   757	     * Tests if the character is normalization-inert.
   758	     * For details see the Normalizer2 base class documentation.
   759	     * @param c character to test
   760	     * @return true if c is normalization-inert
   761	     * @stable ICU 4.4
   762	     */
   763	    virtual UBool isInert(UChar32 c) const override;
   764	private:
   765	    UnicodeString &
   766	    normalize(const UnicodeString &src,
   767	              UnicodeString &dest,
   768	              USetSpanCondition spanCondition,
   769	              UErrorCode &errorCode) const;
   770	
   771	    void
   772	    normalizeUTF8(uint32_t options, const char *src, int32_t length,
   773	                  ByteSink &sink, Edits *edits,
   774	                  USetSpanCondition spanCondition,
   775	                  UErrorCode &errorCode) const;
   776	
   777	    UnicodeString &
   778	    normalizeSecondAndAppend(UnicodeString &first,
   779	                             const UnicodeString &second,
   780	                             UBool doNormalize,
   781	                             UErrorCode &errorCode) const;
   782	
   783	    const Normalizer2 &norm2;
   784	    const UnicodeSet &set;
   785	};
   786	
   787	U_NAMESPACE_END
   788	
   789	#endif  // !UCONFIG_NO_NORMALIZATION
   790	
   791	#endif /* U_SHOW_CPLUSPLUS_API */
   792	
   793	#endif  // __NORMALIZER2_H__