Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/unicode/normlzr.h


$ cat -n /usr/include/unicode/normlzr.h

     1	// © 2016 and later: Unicode, Inc. and others.
     2	// License & terms of use: http://www.unicode.org/copyright.html
     3	/*
     4	 ********************************************************************
     5	 * COPYRIGHT:
     6	 * Copyright (c) 1996-2015, International Business Machines Corporation and
     7	 * others. All Rights Reserved.
     8	 ********************************************************************
     9	 */
    10	
    11	#ifndef NORMLZR_H
    12	#define NORMLZR_H
    13	
    14	#include "unicode/utypes.h"
    15	
    16	#if U_SHOW_CPLUSPLUS_API
    17	
    18	/**
    19	 * \file 
    20	 * \brief C++ API: Unicode Normalization
    21	 */
    22	 
    23	#if !UCONFIG_NO_NORMALIZATION
    24	
    25	#include "unicode/chariter.h"
    26	#include "unicode/normalizer2.h"
    27	#include "unicode/unistr.h"
    28	#include "unicode/unorm.h"
    29	#include "unicode/uobject.h"
    30	
    31	U_NAMESPACE_BEGIN
    32	/**
    33	 * Old Unicode normalization API.
    34	 *
    35	 * This API has been replaced by the Normalizer2 class and is only available
    36	 * for backward compatibility. This class simply delegates to the Normalizer2 class.
    37	 * There is one exception: The new API does not provide a replacement for Normalizer::compare().
    38	 *
    39	 * The Normalizer class supports the standard normalization forms described in
    40	 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
    41	 * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
    42	 *
    43	 * The Normalizer class consists of two parts:
    44	 * - static functions that normalize strings or test if strings are normalized
    45	 * - a Normalizer object is an iterator that takes any kind of text and
    46	 *   provides iteration over its normalized form
    47	 *
    48	 * The Normalizer class is not suitable for subclassing.
    49	 *
    50	 * For basic information about normalization forms and details about the C API
    51	 * please see the documentation in unorm.h.
    52	 *
    53	 * The iterator API with the Normalizer constructors and the non-static functions
    54	 * use a CharacterIterator as input. It is possible to pass a string which
    55	 * is then internally wrapped in a CharacterIterator.
    56	 * The input text is not normalized all at once, but incrementally where needed
    57	 * (providing efficient random access).
    58	 * This allows to pass in a large text but spend only a small amount of time
    59	 * normalizing a small part of that text.
    60	 * However, if the entire text is normalized, then the iterator will be
    61	 * slower than normalizing the entire text at once and iterating over the result.
    62	 * A possible use of the Normalizer iterator is also to report an index into the
    63	 * original text that is close to where the normalized characters come from.
    64	 *
    65	 * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
    66	 * The earlier implementation reported the getIndex() inconsistently,
    67	 * and previous() could not be used after setIndex(), next(), first(), and current().
    68	 *
    69	 * Normalizer allows to start normalizing from anywhere in the input text by
    70	 * calling setIndexOnly(), first(), or last().
    71	 * Without calling any of these, the iterator will start at the beginning of the text.
    72	 *
    73	 * At any time, next() returns the next normalized code point (UChar32),
    74	 * with post-increment semantics (like CharacterIterator::next32PostInc()).
    75	 * previous() returns the previous normalized code point (UChar32),
    76	 * with pre-decrement semantics (like CharacterIterator::previous32()).
    77	 *
    78	 * current() returns the current code point
    79	 * (respectively the one at the newly set index) without moving
    80	 * the getIndex(). Note that if the text at the current position
    81	 * needs to be normalized, then these functions will do that.
    82	 * (This is why current() is not const.)
    83	 * It is more efficient to call setIndexOnly() instead, which does not
    84	 * normalize.
    85	 *
    86	 * getIndex() always refers to the position in the input text where the normalized
    87	 * code points are returned from. It does not always change with each returned
    88	 * code point.
    89	 * The code point that is returned from any of the functions
    90	 * corresponds to text at or after getIndex(), according to the
    91	 * function's iteration semantics (post-increment or pre-decrement).
    92	 *
    93	 * next() returns a code point from at or after the getIndex()
    94	 * from before the next() call. After the next() call, the getIndex()
    95	 * might have moved to where the next code point will be returned from
    96	 * (from a next() or current() call).
    97	 * This is semantically equivalent to array access with array[index++]
    98	 * (post-increment semantics).
    99	 *
   100	 * previous() returns a code point from at or after the getIndex()
   101	 * from after the previous() call.
   102	 * This is semantically equivalent to array access with array[--index]
   103	 * (pre-decrement semantics).
   104	 *
   105	 * Internally, the Normalizer iterator normalizes a small piece of text
   106	 * starting at the getIndex() and ending at a following "safe" index.
   107	 * The normalized results is stored in an internal string buffer, and
   108	 * the code points are iterated from there.
   109	 * With multiple iteration calls, this is repeated until the next piece
   110	 * of text needs to be normalized, and the getIndex() needs to be moved.
   111	 *
   112	 * The following "safe" index, the internal buffer, and the secondary
   113	 * iteration index into that buffer are not exposed on the API.
   114	 * This also means that it is currently not practical to return to
   115	 * a particular, arbitrary position in the text because one would need to
   116	 * know, and be able to set, in addition to the getIndex(), at least also the
   117	 * current index into the internal buffer.
   118	 * It is currently only possible to observe when getIndex() changes
   119	 * (with careful consideration of the iteration semantics),
   120	 * at which time the internal index will be 0.
   121	 * For example, if getIndex() is different after next() than before it,
   122	 * then the internal index is 0 and one can return to this getIndex()
   123	 * later with setIndexOnly().
   124	 *
   125	 * Note: While the setIndex() and getIndex() refer to indices in the
   126	 * underlying Unicode input text, the next() and previous() methods
   127	 * iterate through characters in the normalized output.
   128	 * This means that there is not necessarily a one-to-one correspondence
   129	 * between characters returned by next() and previous() and the indices
   130	 * passed to and returned from setIndex() and getIndex().
   131	 * It is for this reason that Normalizer does not implement the CharacterIterator interface.
   132	 *
   133	 * @author Laura Werner, Mark Davis, Markus Scherer
   134	 * @stable ICU 2.0
   135	 */
   136	class U_COMMON_API Normalizer : public UObject {
   137	public:
   138	#ifndef U_HIDE_DEPRECATED_API
   139	  /**
   140	   * If DONE is returned from an iteration function that returns a code point,
   141	   * then there are no more normalization results available.
   142	   * @deprecated ICU 56 Use Normalizer2 instead.
   143	   */
   144	  enum {
   145	      DONE=0xffff
   146	  };
   147	
   148	  // Constructors
   149	
   150	  /**
   151	   * Creates a new <code>Normalizer</code> object for iterating over the
   152	   * normalized form of a given string.
   153	   * <p>
   154	   * @param str   The string to be normalized.  The normalization
   155	   *              will start at the beginning of the string.
   156	   *
   157	   * @param mode  The normalization mode.
   158	   * @deprecated ICU 56 Use Normalizer2 instead.
   159	   */
   160	  Normalizer(const UnicodeString& str, UNormalizationMode mode);
   161	
   162	  /**
   163	   * Creates a new <code>Normalizer</code> object for iterating over the
   164	   * normalized form of a given string.
   165	   * <p>
   166	   * @param str   The string to be normalized.  The normalization
   167	   *              will start at the beginning of the string.
   168	   *
   169	   * @param length Length of the string, or -1 if NUL-terminated.
   170	   * @param mode  The normalization mode.
   171	   * @deprecated ICU 56 Use Normalizer2 instead.
   172	   */
   173	  Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
   174	
   175	  /**
   176	   * Creates a new <code>Normalizer</code> object for iterating over the
   177	   * normalized form of the given text.
   178	   * <p>
   179	   * @param iter  The input text to be normalized.  The normalization
   180	   *              will start at the beginning of the string.
   181	   *
   182	   * @param mode  The normalization mode.
   183	   * @deprecated ICU 56 Use Normalizer2 instead.
   184	   */
   185	  Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
   186	#endif  /* U_HIDE_DEPRECATED_API */
   187	
   188	#ifndef U_FORCE_HIDE_DEPRECATED_API
   189	  /**
   190	   * Copy constructor.
   191	   * @param copy The object to be copied.
   192	   * @deprecated ICU 56 Use Normalizer2 instead.
   193	   */
   194	  Normalizer(const Normalizer& copy);
   195	
   196	  /**
   197	   * Destructor
   198	   * @deprecated ICU 56 Use Normalizer2 instead.
   199	   */
   200	  virtual ~Normalizer();
   201	#endif  // U_FORCE_HIDE_DEPRECATED_API
   202	
   203	  //-------------------------------------------------------------------------
   204	  // Static utility methods
   205	  //-------------------------------------------------------------------------
   206	
   207	#ifndef U_HIDE_DEPRECATED_API
   208	  /**
   209	   * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
   210	   * This is a wrapper for unorm_normalize(), using UnicodeString's.
   211	   *
   212	   * The <code>options</code> parameter specifies which optional
   213	   * <code>Normalizer</code> features are to be enabled for this operation.
   214	   *
   215	   * @param source    the input string to be normalized.
   216	   * @param mode      the normalization mode
   217	   * @param options   the optional features to be enabled (0 for no options)
   218	   * @param result    The normalized string (on output).
   219	   * @param status    The error code.
   220	   * @deprecated ICU 56 Use Normalizer2 instead.
   221	   */
   222	  static void U_EXPORT2 normalize(const UnicodeString& source,
   223	                        UNormalizationMode mode, int32_t options,
   224	                        UnicodeString& result,
   225	                        UErrorCode &status);
   226	
   227	  /**
   228	   * Compose a <code>UnicodeString</code>.
   229	   * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
   230	   * This is a wrapper for unorm_normalize(), using UnicodeString's.
   231	   *
   232	   * The <code>options</code> parameter specifies which optional
   233	   * <code>Normalizer</code> features are to be enabled for this operation.
   234	   *
   235	   * @param source    the string to be composed.
   236	   * @param compat    Perform compatibility decomposition before composition.
   237	   *                  If this argument is <code>false</code>, only canonical
   238	   *                  decomposition will be performed.
   239	   * @param options   the optional features to be enabled (0 for no options)
   240	   * @param result    The composed string (on output).
   241	   * @param status    The error code.
   242	   * @deprecated ICU 56 Use Normalizer2 instead.
   243	   */
   244	  static void U_EXPORT2 compose(const UnicodeString& source,
   245	                      UBool compat, int32_t options,
   246	                      UnicodeString& result,
   247	                      UErrorCode &status);
   248	
   249	  /**
   250	   * Static method to decompose a <code>UnicodeString</code>.
   251	   * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
   252	   * This is a wrapper for unorm_normalize(), using UnicodeString's.
   253	   *
   254	   * The <code>options</code> parameter specifies which optional
   255	   * <code>Normalizer</code> features are to be enabled for this operation.
   256	   *
   257	   * @param source    the string to be decomposed.
   258	   * @param compat    Perform compatibility decomposition.
   259	   *                  If this argument is <code>false</code>, only canonical
   260	   *                  decomposition will be performed.
   261	   * @param options   the optional features to be enabled (0 for no options)
   262	   * @param result    The decomposed string (on output).
   263	   * @param status    The error code.
   264	   * @deprecated ICU 56 Use Normalizer2 instead.
   265	   */
   266	  static void U_EXPORT2 decompose(const UnicodeString& source,
   267	                        UBool compat, int32_t options,
   268	                        UnicodeString& result,
   269	                        UErrorCode &status);
   270	
   271	  /**
   272	   * Performing quick check on a string, to quickly determine if the string is
   273	   * in a particular normalization format.
   274	   * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
   275	   *
   276	   * Three types of result can be returned UNORM_YES, UNORM_NO or
   277	   * UNORM_MAYBE. Result UNORM_YES indicates that the argument
   278	   * string is in the desired normalized format, UNORM_NO determines that
   279	   * argument string is not in the desired normalized format. A
   280	   * UNORM_MAYBE result indicates that a more thorough check is required,
   281	   * the user may have to put the string in its normalized form and compare the
   282	   * results.
   283	   * @param source       string for determining if it is in a normalized format
   284	   * @param mode         normalization format
   285	   * @param status A reference to a UErrorCode to receive any errors
   286	   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
   287	   *
   288	   * @see isNormalized
   289	   * @deprecated ICU 56 Use Normalizer2 instead.
   290	   */
   291	  static inline UNormalizationCheckResult
   292	  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
   293	
   294	  /**
   295	   * Performing quick check on a string; same as the other version of quickCheck
   296	   * but takes an extra options parameter like most normalization functions.
   297	   *
   298	   * @param source       string for determining if it is in a normalized format
   299	   * @param mode         normalization format
   300	   * @param options      the optional features to be enabled (0 for no options)
   301	   * @param status A reference to a UErrorCode to receive any errors
   302	   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
   303	   *
   304	   * @see isNormalized
   305	   * @deprecated ICU 56 Use Normalizer2 instead.
   306	   */
   307	  static UNormalizationCheckResult
   308	  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
   309	
   310	  /**
   311	   * Test if a string is in a given normalization form.
   312	   * This is semantically equivalent to source.equals(normalize(source, mode)) .
   313	   *
   314	   * Unlike unorm_quickCheck(), this function returns a definitive result,
   315	   * never a "maybe".
   316	   * For NFD, NFKD, and FCD, both functions work exactly the same.
   317	   * For NFC and NFKC where quickCheck may return "maybe", this function will
   318	   * perform further tests to arrive at a true/false result.
   319	   *
   320	   * @param src        String that is to be tested if it is in a normalization format.
   321	   * @param mode       Which normalization form to test for.
   322	   * @param errorCode  ICU error code in/out parameter.
   323	   *                   Must fulfill U_SUCCESS before the function call.
   324	   * @return Boolean value indicating whether the source string is in the
   325	   *         "mode" normalization form.
   326	   *
   327	   * @see quickCheck
   328	   * @deprecated ICU 56 Use Normalizer2 instead.
   329	   */
   330	  static inline UBool
   331	  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
   332	
   333	  /**
   334	   * Test if a string is in a given normalization form; same as the other version of isNormalized
   335	   * but takes an extra options parameter like most normalization functions.
   336	   *
   337	   * @param src        String that is to be tested if it is in a normalization format.
   338	   * @param mode       Which normalization form to test for.
   339	   * @param options      the optional features to be enabled (0 for no options)
   340	   * @param errorCode  ICU error code in/out parameter.
   341	   *                   Must fulfill U_SUCCESS before the function call.
   342	   * @return Boolean value indicating whether the source string is in the
   343	   *         "mode" normalization form.
   344	   *
   345	   * @see quickCheck
   346	   * @deprecated ICU 56 Use Normalizer2 instead.
   347	   */
   348	  static UBool
   349	  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
   350	
   351	  /**
   352	   * Concatenate normalized strings, making sure that the result is normalized as well.
   353	   *
   354	   * If both the left and the right strings are in
   355	   * the normalization form according to "mode/options",
   356	   * then the result will be
   357	   *
   358	   * \code
   359	   *     dest=normalize(left+right, mode, options)
   360	   * \endcode
   361	   *
   362	   * For details see unorm_concatenate in unorm.h.
   363	   *
   364	   * @param left Left source string.
   365	   * @param right Right source string.
   366	   * @param result The output string.
   367	   * @param mode The normalization mode.
   368	   * @param options A bit set of normalization options.
   369	   * @param errorCode ICU error code in/out parameter.
   370	   *                   Must fulfill U_SUCCESS before the function call.
   371	   * @return result
   372	   *
   373	   * @see unorm_concatenate
   374	   * @see normalize
   375	   * @see unorm_next
   376	   * @see unorm_previous
   377	   *
   378	   * @deprecated ICU 56 Use Normalizer2 instead.
   379	   */
   380	  static UnicodeString &
   381	  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
   382	              UnicodeString &result,
   383	              UNormalizationMode mode, int32_t options,
   384	              UErrorCode &errorCode);
   385	#endif  /* U_HIDE_DEPRECATED_API */
   386	
   387	  /**
   388	   * Compare two strings for canonical equivalence.
   389	   * Further options include case-insensitive comparison and
   390	   * code point order (as opposed to code unit order).
   391	   *
   392	   * Canonical equivalence between two strings is defined as their normalized
   393	   * forms (NFD or NFC) being identical.
   394	   * This function compares strings incrementally instead of normalizing
   395	   * (and optionally case-folding) both strings entirely,
   396	   * improving performance significantly.
   397	   *
   398	   * Bulk normalization is only necessary if the strings do not fulfill the FCD
   399	   * conditions. Only in this case, and only if the strings are relatively long,
   400	   * is memory allocated temporarily.
   401	   * For FCD strings and short non-FCD strings there is no memory allocation.
   402	   *
   403	   * Semantically, this is equivalent to
   404	   *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
   405	   * where code point order and foldCase are all optional.
   406	   *
   407	   * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
   408	   * the case folding must be performed first, then the normalization.
   409	   *
   410	   * @param s1 First source string.
   411	   * @param s2 Second source string.
   412	   *
   413	   * @param options A bit set of options:
   414	   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   415	   *     Case-sensitive comparison in code unit order, and the input strings
   416	   *     are quick-checked for FCD.
   417	   *
   418	   *   - UNORM_INPUT_IS_FCD
   419	   *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
   420	   *     If not set, the function will quickCheck for FCD
   421	   *     and normalize if necessary.
   422	   *
   423	   *   - U_COMPARE_CODE_POINT_ORDER
   424	   *     Set to choose code point order instead of code unit order
   425	   *     (see u_strCompare for details).
   426	   *
   427	   *   - U_COMPARE_IGNORE_CASE
   428	   *     Set to compare strings case-insensitively using case folding,
   429	   *     instead of case-sensitively.
   430	   *     If set, then the following case folding options are used.
   431	   *
   432	   *   - Options as used with case-insensitive comparisons, currently:
   433	   *
   434	   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   435	   *    (see u_strCaseCompare for details)
   436	   *
   437	   *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
   438	   *
   439	   * @param errorCode ICU error code in/out parameter.
   440	   *                  Must fulfill U_SUCCESS before the function call.
   441	   * @return <0 or 0 or >0 as usual for string comparisons
   442	   *
   443	   * @see unorm_compare
   444	   * @see normalize
   445	   * @see UNORM_FCD
   446	   * @see u_strCompare
   447	   * @see u_strCaseCompare
   448	   *
   449	   * @stable ICU 2.2
   450	   */
   451	  static inline int32_t
   452	  compare(const UnicodeString &s1, const UnicodeString &s2,
   453	          uint32_t options,
   454	          UErrorCode &errorCode);
   455	
   456	#ifndef U_HIDE_DEPRECATED_API
   457	  //-------------------------------------------------------------------------
   458	  // Iteration API
   459	  //-------------------------------------------------------------------------
   460	
   461	  /**
   462	   * Return the current character in the normalized text.
   463	   * current() may need to normalize some text at getIndex().
   464	   * The getIndex() is not changed.
   465	   *
   466	   * @return the current normalized code point
   467	   * @deprecated ICU 56 Use Normalizer2 instead.
   468	   */
   469	  UChar32              current(void);
   470	
   471	  /**
   472	   * Return the first character in the normalized text.
   473	   * This is equivalent to setIndexOnly(startIndex()) followed by next().
   474	   * (Post-increment semantics.)
   475	   *
   476	   * @return the first normalized code point
   477	   * @deprecated ICU 56 Use Normalizer2 instead.
   478	   */
   479	  UChar32              first(void);
   480	
   481	  /**
   482	   * Return the last character in the normalized text.
   483	   * This is equivalent to setIndexOnly(endIndex()) followed by previous().
   484	   * (Pre-decrement semantics.)
   485	   *
   486	   * @return the last normalized code point
   487	   * @deprecated ICU 56 Use Normalizer2 instead.
   488	   */
   489	  UChar32              last(void);
   490	
   491	  /**
   492	   * Return the next character in the normalized text.
   493	   * (Post-increment semantics.)
   494	   * If the end of the text has already been reached, DONE is returned.
   495	   * The DONE value could be confused with a U+FFFF non-character code point
   496	   * in the text. If this is possible, you can test getIndex()<endIndex()
   497	   * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
   498	   * after calling next(). (Calling last() will change the iterator state!)
   499	   *
   500	   * The C API unorm_next() is more efficient and does not have this ambiguity.
   501	   *
   502	   * @return the next normalized code point
   503	   * @deprecated ICU 56 Use Normalizer2 instead.
   504	   */
   505	  UChar32              next(void);
   506	
   507	  /**
   508	   * Return the previous character in the normalized text and decrement.
   509	   * (Pre-decrement semantics.)
   510	   * If the beginning of the text has already been reached, DONE is returned.
   511	   * The DONE value could be confused with a U+FFFF non-character code point
   512	   * in the text. If this is possible, you can test
   513	   * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
   514	   * the iterator state!)
   515	   *
   516	   * The C API unorm_previous() is more efficient and does not have this ambiguity.
   517	   *
   518	   * @return the previous normalized code point
   519	   * @deprecated ICU 56 Use Normalizer2 instead.
   520	   */
   521	  UChar32              previous(void);
   522	
   523	  /**
   524	   * Set the iteration position in the input text that is being normalized,
   525	   * without any immediate normalization.
   526	   * After setIndexOnly(), getIndex() will return the same index that is
   527	   * specified here.
   528	   *
   529	   * @param index the desired index in the input text.
   530	   * @deprecated ICU 56 Use Normalizer2 instead.
   531	   */
   532	  void                 setIndexOnly(int32_t index);
   533	
   534	  /**
   535	   * Reset the index to the beginning of the text.
   536	   * This is equivalent to setIndexOnly(startIndex)).
   537	   * @deprecated ICU 56 Use Normalizer2 instead.
   538	   */
   539	  void                reset(void);
   540	
   541	  /**
   542	   * Retrieve the current iteration position in the input text that is
   543	   * being normalized.
   544	   *
   545	   * A following call to next() will return a normalized code point from
   546	   * the input text at or after this index.
   547	   *
   548	   * After a call to previous(), getIndex() will point at or before the
   549	   * position in the input text where the normalized code point
   550	   * was returned from with previous().
   551	   *
   552	   * @return the current index in the input text
   553	   * @deprecated ICU 56 Use Normalizer2 instead.
   554	   */
   555	  int32_t            getIndex(void) const;
   556	
   557	  /**
   558	   * Retrieve the index of the start of the input text. This is the begin index
   559	   * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
   560	   * over which this <code>Normalizer</code> is iterating.
   561	   *
   562	   * @return the smallest index in the input text where the Normalizer operates
   563	   * @deprecated ICU 56 Use Normalizer2 instead.
   564	   */
   565	  int32_t            startIndex(void) const;
   566	
   567	  /**
   568	   * Retrieve the index of the end of the input text. This is the end index
   569	   * of the <code>CharacterIterator</code> or the length of the string
   570	   * over which this <code>Normalizer</code> is iterating.
   571	   * This end index is exclusive, i.e., the Normalizer operates only on characters
   572	   * before this index.
   573	   *
   574	   * @return the first index in the input text where the Normalizer does not operate
   575	   * @deprecated ICU 56 Use Normalizer2 instead.
   576	   */
   577	  int32_t            endIndex(void) const;
   578	
   579	  /**
   580	   * Returns true when both iterators refer to the same character in the same
   581	   * input text.
   582	   *
   583	   * @param that a Normalizer object to compare this one to
   584	   * @return comparison result
   585	   * @deprecated ICU 56 Use Normalizer2 instead.
   586	   */
   587	  bool         operator==(const Normalizer& that) const;
   588	
   589	  /**
   590	   * Returns false when both iterators refer to the same character in the same
   591	   * input text.
   592	   *
   593	   * @param that a Normalizer object to compare this one to
   594	   * @return comparison result
   595	   * @deprecated ICU 56 Use Normalizer2 instead.
   596	   */
   597	  inline bool         operator!=(const Normalizer& that) const;
   598	
   599	  /**
   600	   * Returns a pointer to a new Normalizer that is a clone of this one.
   601	   * The caller is responsible for deleting the new clone.
   602	   * @return a pointer to a new Normalizer
   603	   * @deprecated ICU 56 Use Normalizer2 instead.
   604	   */
   605	  Normalizer*        clone() const;
   606	
   607	  /**
   608	   * Generates a hash code for this iterator.
   609	   *
   610	   * @return the hash code
   611	   * @deprecated ICU 56 Use Normalizer2 instead.
   612	   */
   613	  int32_t                hashCode(void) const;
   614	
   615	  //-------------------------------------------------------------------------
   616	  // Property access methods
   617	  //-------------------------------------------------------------------------
   618	
   619	  /**
   620	   * Set the normalization mode for this object.
   621	   * <p>
   622	   * <b>Note:</b>If the normalization mode is changed while iterating
   623	   * over a string, calls to {@link #next() } and {@link #previous() } may
   624	   * return previously buffers characters in the old normalization mode
   625	   * until the iteration is able to re-sync at the next base character.
   626	   * It is safest to call {@link #setIndexOnly }, {@link #reset() },
   627	   * {@link #setText }, {@link #first() },
   628	   * {@link #last() }, etc. after calling <code>setMode</code>.
   629	   * <p>
   630	   * @param newMode the new mode for this <code>Normalizer</code>.
   631	   * @see #getUMode
   632	   * @deprecated ICU 56 Use Normalizer2 instead.
   633	   */
   634	  void setMode(UNormalizationMode newMode);
   635	
   636	  /**
   637	   * Return the normalization mode for this object.
   638	   *
   639	   * This is an unusual name because there used to be a getMode() that
   640	   * returned a different type.
   641	   *
   642	   * @return the mode for this <code>Normalizer</code>
   643	   * @see #setMode
   644	   * @deprecated ICU 56 Use Normalizer2 instead.
   645	   */
   646	  UNormalizationMode getUMode(void) const;
   647	
   648	  /**
   649	   * Set options that affect this <code>Normalizer</code>'s operation.
   650	   * Options do not change the basic composition or decomposition operation
   651	   * that is being performed, but they control whether
   652	   * certain optional portions of the operation are done.
   653	   * Currently the only available option is obsolete.
   654	   *
   655	   * It is possible to specify multiple options that are all turned on or off.
   656	   *
   657	   * @param   option  the option(s) whose value is/are to be set.
   658	   * @param   value   the new setting for the option.  Use <code>true</code> to
   659	   *                  turn the option(s) on and <code>false</code> to turn it/them off.
   660	   *
   661	   * @see #getOption
   662	   * @deprecated ICU 56 Use Normalizer2 instead.
   663	   */
   664	  void setOption(int32_t option,
   665	         UBool value);
   666	
   667	  /**
   668	   * Determine whether an option is turned on or off.
   669	   * If multiple options are specified, then the result is true if any
   670	   * of them are set.
   671	   * <p>
   672	   * @param option the option(s) that are to be checked
   673	   * @return true if any of the option(s) are set
   674	   * @see #setOption
   675	   * @deprecated ICU 56 Use Normalizer2 instead.
   676	   */
   677	  UBool getOption(int32_t option) const;
   678	
   679	  /**
   680	   * Set the input text over which this <code>Normalizer</code> will iterate.
   681	   * The iteration position is set to the beginning.
   682	   *
   683	   * @param newText a string that replaces the current input text
   684	   * @param status a UErrorCode
   685	   * @deprecated ICU 56 Use Normalizer2 instead.
   686	   */
   687	  void setText(const UnicodeString& newText,
   688	           UErrorCode &status);
   689	
   690	  /**
   691	   * Set the input text over which this <code>Normalizer</code> will iterate.
   692	   * The iteration position is set to the beginning.
   693	   *
   694	   * @param newText a CharacterIterator object that replaces the current input text
   695	   * @param status a UErrorCode
   696	   * @deprecated ICU 56 Use Normalizer2 instead.
   697	   */
   698	  void setText(const CharacterIterator& newText,
   699	           UErrorCode &status);
   700	
   701	  /**
   702	   * Set the input text over which this <code>Normalizer</code> will iterate.
   703	   * The iteration position is set to the beginning.
   704	   *
   705	   * @param newText a string that replaces the current input text
   706	   * @param length the length of the string, or -1 if NUL-terminated
   707	   * @param status a UErrorCode
   708	   * @deprecated ICU 56 Use Normalizer2 instead.
   709	   */
   710	  void setText(ConstChar16Ptr newText,
   711	                    int32_t length,
   712	            UErrorCode &status);
   713	  /**
   714	   * Copies the input text into the UnicodeString argument.
   715	   *
   716	   * @param result Receives a copy of the text under iteration.
   717	   * @deprecated ICU 56 Use Normalizer2 instead.
   718	   */
   719	  void            getText(UnicodeString&  result);
   720	
   721	  /**
   722	   * ICU "poor man's RTTI", returns a UClassID for this class.
   723	   * @returns a UClassID for this class.
   724	   * @deprecated ICU 56 Use Normalizer2 instead.
   725	   */
   726	  static UClassID U_EXPORT2 getStaticClassID();
   727	#endif  /* U_HIDE_DEPRECATED_API */
   728	
   729	#ifndef U_FORCE_HIDE_DEPRECATED_API
   730	  /**
   731	   * ICU "poor man's RTTI", returns a UClassID for the actual class.
   732	   * @return a UClassID for the actual class.
   733	   * @deprecated ICU 56 Use Normalizer2 instead.
   734	   */
   735	  virtual UClassID getDynamicClassID() const override;
   736	#endif  // U_FORCE_HIDE_DEPRECATED_API
   737	
   738	private:
   739	  //-------------------------------------------------------------------------
   740	  // Private functions
   741	  //-------------------------------------------------------------------------
   742	
   743	  Normalizer() = delete; // default constructor not implemented
   744	  Normalizer &operator=(const Normalizer &that) = delete; // assignment operator not implemented
   745	
   746	  // Private utility methods for iteration
   747	  // For documentation, see the source code
   748	  UBool nextNormalize();
   749	  UBool previousNormalize();
   750	
   751	  void    init();
   752	  void    clearBuffer(void);
   753	
   754	  //-------------------------------------------------------------------------
   755	  // Private data
   756	  //-------------------------------------------------------------------------
   757	
   758	  FilteredNormalizer2*fFilteredNorm2;  // owned if not nullptr
   759	  const Normalizer2  *fNorm2;  // not owned; may be equal to fFilteredNorm2
   760	  UNormalizationMode  fUMode;  // deprecated
   761	  int32_t             fOptions;
   762	
   763	  // The input text and our position in it
   764	  CharacterIterator  *text;
   765	
   766	  // The normalization buffer is the result of normalization
   767	  // of the source in [currentIndex..nextIndex[ .
   768	  int32_t         currentIndex, nextIndex;
   769	
   770	  // A buffer for holding intermediate results
   771	  UnicodeString       buffer;
   772	  int32_t         bufferPos;
   773	};
   774	
   775	//-------------------------------------------------------------------------
   776	// Inline implementations
   777	//-------------------------------------------------------------------------
   778	
   779	#ifndef U_HIDE_DEPRECATED_API
   780	inline bool
   781	Normalizer::operator!= (const Normalizer& other) const
   782	{ return ! operator==(other); }
   783	
   784	inline UNormalizationCheckResult
   785	Normalizer::quickCheck(const UnicodeString& source,
   786	                       UNormalizationMode mode,
   787	                       UErrorCode &status) {
   788	    return quickCheck(source, mode, 0, status);
   789	}
   790	
   791	inline UBool
   792	Normalizer::isNormalized(const UnicodeString& source,
   793	                         UNormalizationMode mode,
   794	                         UErrorCode &status) {
   795	    return isNormalized(source, mode, 0, status);
   796	}
   797	#endif  /* U_HIDE_DEPRECATED_API */
   798	
   799	inline int32_t
   800	Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
   801	                    uint32_t options,
   802	                    UErrorCode &errorCode) {
   803	  // all argument checking is done in unorm_compare
   804	  return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
   805	                       toUCharPtr(s2.getBuffer()), s2.length(),
   806	                       options,
   807	                       &errorCode);
   808	}
   809	
   810	U_NAMESPACE_END
   811	
   812	#endif /* #if !UCONFIG_NO_NORMALIZATION */
   813	
   814	#endif // NORMLZR_H
   815	
   816	#endif /* U_SHOW_CPLUSPLUS_API */