Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/unicode/uspoof.h


$ cat -n /usr/include/unicode/uspoof.h

     1	// © 2016 and later: Unicode, Inc. and others.
     2	// License & terms of use: http://www.unicode.org/copyright.html
     3	/*
     4	***************************************************************************
     5	* Copyright (C) 2008-2016, International Business Machines Corporation
     6	* and others. All Rights Reserved.
     7	***************************************************************************
     8	*   file name:  uspoof.h
     9	*   encoding:   UTF-8
    10	*   tab size:   8 (not used)
    11	*   indentation:4
    12	*
    13	*   created on: 2008Feb13
    14	*   created by: Andy Heninger
    15	*
    16	*   Unicode Spoof Detection
    17	*/
    18	
    19	#ifndef USPOOF_H
    20	#define USPOOF_H
    21	
    22	#include "unicode/ubidi.h"
    23	#include "unicode/utypes.h"
    24	#include "unicode/uset.h"
    25	#include "unicode/parseerr.h"
    26	
    27	#if !UCONFIG_NO_NORMALIZATION
    28	
    29	
    30	#if U_SHOW_CPLUSPLUS_API
    31	#include "unicode/localpointer.h"
    32	#include "unicode/unistr.h"
    33	#include "unicode/uniset.h"
    34	#endif
    35	
    36	
    37	/**
    38	 * \file
    39	 * \brief C API: Unicode Security and Spoofing Detection
    40	 *
    41	 * <p>
    42	 * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
    43	 * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
    44	 *
    45	 * <ol>
    46	 * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
    47	 * "Ηarvest", where the second string starts with the Greek capital letter Eta.</li>
    48	 * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
    49	 * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
    50	 * </ol>
    51	 *
    52	 * <p>
    53	 * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
    54	 * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
    55	 * content filters.
    56	 *
    57	 * <p>
    58	 * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
    59	 *
    60	 * <h2>Confusables</h2>
    61	 *
    62	 * <p>
    63	 * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
    64	 *
    65	 * \code{.c}
    66	 * UErrorCode status = U_ZERO_ERROR;
    67	 * UChar* str1 = (UChar*) u"Harvest";
    68	 * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
    69	 *
    70	 * USpoofChecker* sc = uspoof_open(&status);
    71	 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    72	 *
    73	 * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
    74	 * UBool result = bitmask != 0;
    75	 * // areConfusable: 1 (status: U_ZERO_ERROR)
    76	 * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
    77	 * uspoof_close(sc);
    78	 * \endcode
    79	 *
    80	 * <p>
    81	 * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
    82	 * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
    83	 * confusability test; and the following line extracts the result out of the return value. For best performance,
    84	 * the instance should be created once (e.g., upon application startup), and the efficient
    85	 * {@link uspoof_areConfusable} method can be used at runtime.
    86	 *
    87	 * If the paragraph direction used to display the strings is known, the bidi function should be used instead:
    88	 *
    89	 * \code{.c}
    90	 * UErrorCode status = U_ZERO_ERROR;
    91	 * // These strings look identical when rendered in a left-to-right context.
    92	 * // They look distinct in a right-to-left context.
    93	 * UChar* str1 = (UChar*) u"A1\u05D0";  // A1א
    94	 * UChar* str2 = (UChar*) u"A\u05D01";  // Aא1
    95	 *
    96	 * USpoofChecker* sc = uspoof_open(&status);
    97	 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    98	 *
    99	 * int32_t bitmask = uspoof_areBidiConfusable(sc, UBIDI_LTR, str1, -1, str2, -1, &status);
   100	 * UBool result = bitmask != 0;
   101	 * // areBidiConfusable: 1 (status: U_ZERO_ERROR)
   102	 * printf("areBidiConfusable: %d (status: %s)\n", result, u_errorName(status));
   103	 * uspoof_close(sc);
   104	 * \endcode
   105	 *
   106	 * <p>
   107	 * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers.  It will automatically call
   108	 * {@link uspoof_close} when the object goes out of scope:
   109	 *
   110	 * \code{.cpp}
   111	 * UErrorCode status = U_ZERO_ERROR;
   112	 * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
   113	 * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
   114	 * // ...
   115	 * \endcode
   116	 *
   117	 * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
   118	 * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
   119	 * the following snippet is equivalent to the example above:
   120	 *
   121	 * \code{.c}
   122	 * UErrorCode status = U_ZERO_ERROR;
   123	 * UChar* str1 = (UChar*) u"Harvest";
   124	 * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
   125	 *
   126	 * USpoofChecker* sc = uspoof_open(&status);
   127	 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
   128	 *
   129	 * // Get skeleton 1
   130	 * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
   131	 * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
   132	 * status = U_ZERO_ERROR;
   133	 * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
   134	 *
   135	 * // Get skeleton 2
   136	 * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
   137	 * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
   138	 * status = U_ZERO_ERROR;
   139	 * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
   140	 *
   141	 * // Are the skeletons the same?
   142	 * UBool result = u_strcmp(skel1, skel2) == 0;
   143	 * // areConfusable: 1 (status: U_ZERO_ERROR)
   144	 * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
   145	 * uspoof_close(sc);
   146	 * free(skel1);
   147	 * free(skel2);
   148	 * \endcode
   149	 *
   150	 * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
   151	 * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
   152	 *
   153	 * \code{.c}
   154	 * UErrorCode status = U_ZERO_ERROR;
   155	 * #define DICTIONARY_LENGTH 2
   156	 * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
   157	 * UChar* skeletons[DICTIONARY_LENGTH];
   158	 * UChar* str = (UChar*) u"1orern";
   159	 *
   160	 * // Setup:
   161	 * USpoofChecker* sc = uspoof_open(&status);
   162	 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
   163	 * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
   164	 *     UChar* word = dictionary[i];
   165	 *     int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
   166	 *     skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
   167	 *     status = U_ZERO_ERROR;
   168	 *     uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
   169	 * }
   170	 *
   171	 * // Live Check:
   172	 * {
   173	 *     int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
   174	 *     UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
   175	 *     status = U_ZERO_ERROR;
   176	 *     uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
   177	 *     UBool result = false;
   178	 *     for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
   179	 *         result = u_strcmp(skel, skeletons[i]) == 0;
   180	 *         if (result == true) { break; }
   181	 *     }
   182	 *     // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
   183	 *     printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
   184	 *     free(skel);
   185	 * }
   186	 *
   187	 * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
   188	 *     free(skeletons[i]);
   189	 * }
   190	 * uspoof_close(sc);
   191	 * \endcode
   192	 *
   193	 * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
   194	 * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
   195	 * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
   196	 *
   197	 * <h2>Spoof Detection</h2>
   198	 *
   199	 * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
   200	 * string:
   201	 *
   202	 * \code{.c}
   203	 * UErrorCode status = U_ZERO_ERROR;
   204	 * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
   205	 *
   206	 * // Get the default set of allowable characters:
   207	 * USet* allowed = uset_openEmpty();
   208	 * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
   209	 * uset_addAll(allowed, uspoof_getInclusionSet(&status));
   210	 *
   211	 * USpoofChecker* sc = uspoof_open(&status);
   212	 * uspoof_setAllowedChars(sc, allowed, &status);
   213	 * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
   214	 *
   215	 * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
   216	 * UBool result = bitmask != 0;
   217	 * // fails checks: 1 (status: U_ZERO_ERROR)
   218	 * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
   219	 * uspoof_close(sc);
   220	 * uset_close(allowed);
   221	 * \endcode
   222	 *
   223	 * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
   224	 * startup, and call the cheaper {@link uspoof_check} online. We specify the set of
   225	 * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
   226	 *
   227	 * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
   228	 * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
   229	 *
   230	 * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
   231	 * is available in the returned bitmask.  For complete information, use the {@link uspoof_check2} class of functions
   232	 * with a {@link USpoofCheckResult} parameter:
   233	 *
   234	 * \code{.c}
   235	 * UErrorCode status = U_ZERO_ERROR;
   236	 * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
   237	 *
   238	 * // Get the default set of allowable characters:
   239	 * USet* allowed = uset_openEmpty();
   240	 * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
   241	 * uset_addAll(allowed, uspoof_getInclusionSet(&status));
   242	 *
   243	 * USpoofChecker* sc = uspoof_open(&status);
   244	 * uspoof_setAllowedChars(sc, allowed, &status);
   245	 * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
   246	 *
   247	 * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
   248	 * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
   249	 *
   250	 * int32_t failures1 = bitmask;
   251	 * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
   252	 * assert(failures1 == failures2);
   253	 * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
   254	 * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
   255	 *
   256	 * // Cleanup:
   257	 * uspoof_close(sc);
   258	 * uset_close(allowed);
   259	 * uspoof_closeCheckResult(checkResult);
   260	 * \endcode
   261	 *
   262	 * C++ users can take advantage of a few syntactical conveniences.  The following snippet is functionally
   263	 * equivalent to the one above:
   264	 *
   265	 * \code{.cpp}
   266	 * UErrorCode status = U_ZERO_ERROR;
   267	 * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
   268	 *
   269	 * // Get the default set of allowable characters:
   270	 * UnicodeSet allowed;
   271	 * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
   272	 * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
   273	 *
   274	 * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
   275	 * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
   276	 * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
   277	 *
   278	 * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
   279	 * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
   280	 *
   281	 * int32_t failures1 = bitmask;
   282	 * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
   283	 * assert(failures1 == failures2);
   284	 * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
   285	 * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
   286	 *
   287	 * // Explicit cleanup not necessary.
   288	 * \endcode
   289	 *
   290	 * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
   291	 * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
   292	 *
   293	 * <ul>
   294	 * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
   295	 * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
   296	 * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
   297	 * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
   298	 * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
   299	 * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
   300	 * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
   301	 * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
   302	 * </ul>
   303	 *
   304	 * <p>
   305	 * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
   306	 * INVISIBLE and MIXED_NUMBERS conditions, you could do:
   307	 *
   308	 * \code{.c}
   309	 * UErrorCode status = U_ZERO_ERROR;
   310	 * UChar* str = (UChar*) u"8\u09EA";  // 8 mixed with U+09EA BENGALI DIGIT FOUR
   311	 *
   312	 * USpoofChecker* sc = uspoof_open(&status);
   313	 * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
   314	 *
   315	 * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
   316	 * UBool result = bitmask != 0;
   317	 * // fails checks: 1 (status: U_ZERO_ERROR)
   318	 * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
   319	 * uspoof_close(sc);
   320	 * \endcode
   321	 *
   322	 * Here is an example in C++ showing how to compute the restriction level of a string:
   323	 *
   324	 * \code{.cpp}
   325	 * UErrorCode status = U_ZERO_ERROR;
   326	 * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
   327	 *
   328	 * // Get the default set of allowable characters:
   329	 * UnicodeSet allowed;
   330	 * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
   331	 * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
   332	 *
   333	 * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
   334	 * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
   335	 * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
   336	 * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
   337	 *
   338	 * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
   339	 * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
   340	 *
   341	 * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
   342	 * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
   343	 * assert((restrictionLevel & bitmask) == restrictionLevel);
   344	 * // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
   345	 * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
   346	 * \endcode
   347	 *
   348	 * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE.  Since
   349	 * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
   350	 *
   351	 * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
   352	 * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
   353	 * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
   354	 * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
   355	 * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
   356	 * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
   357	 * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
   358	 * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
   359	 * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
   360	 * scripts.
   361	 *
   362	 * <h2>Advanced bidirectional usage</h2>
   363	 * If the paragraph direction with which the identifiers will be displayed is not known, there are
   364	 * multiple options for confusable detection depending on the circumstances.
   365	 *
   366	 * <p>
   367	 * In some circumstances, the only concern is confusion between identifiers displayed with the same
   368	 * paragraph direction.
   369	 *
   370	 * <p>
   371	 * An example is the case where identifiers are usernames prefixed with the @ symbol.
   372	 * That symbol will appear to the left in a left-to-right context, and to the right in a
   373	 * right-to-left context, so that an identifier displayed in a left-to-right context can never be
   374	 * confused with an identifier displayed in a right-to-left context:
   375	 * <ul>
   376	 * <li>
   377	 * The usernames "A1א" (A one aleph) and "Aא1" (A aleph 1)
   378	 * would be considered confusable, since they both appear as \@A1א in a left-to-right context, and the
   379	 * usernames "אA_1" (aleph A underscore one) and "א1_A" (aleph one underscore A) would be considered
   380	 * confusable, since they both appear as A_1א@ in a right-to-left context.
   381	 * </li>
   382	 * <li>
   383	 * The username "Mark_" would not be considered confusable with the username "_Mark",
   384	 * even though the latter would appear as Mark_@ in a right-to-left context, and the
   385	 * former as \@Mark_ in a left-to-right context.
   386	 * </li>
   387	 * </ul>
   388	 * <p>
   389	 * In that case, the caller should check for both LTR-confusability and RTL-confusability:
   390	 *
   391	 * \code{.cpp}
   392	 * bool confusableInEitherDirection =
   393	 *     uspoof_areBidiConfusableUnicodeString(sc, UBIDI_LTR, id1, id2, &status) ||
   394	 *     uspoof_areBidiConfusableUnicodeString(sc, UBIDI_RTL, id1, id2, &status);
   395	 * \endcode
   396	 *
   397	 * If the bidiSkeleton is used, the LTR and RTL skeleta should be kept separately and compared, LTR
   398	 * with LTR and RTL with RTL.
   399	 *
   400	 * <p>
   401	 * In cases where confusability between the visual appearances of an identifier displayed in a
   402	 * left-to-right context with another identifier displayed in a right-to-left context is a concern,
   403	 * the LTR skeleton of one can be compared with the RTL skeleton of the other.  However, this
   404	 * very broad definition of confusability may have unexpected results; for instance, it treats the
   405	 * ASCII identifiers "Mark_" and "_Mark" as confusable.
   406	 *
   407	 * <h2>Additional Information</h2>
   408	 *
   409	 * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
   410	 *
   411	 * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
   412	 * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
   413	 * using the same USpoofChecker instance.
   414	 *
   415	 * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
   416	 * thread safe. Those that take a non-const USpoofChecker are not thread safe..
   417	 *
   418	 * @stable ICU 4.6
   419	 */
   420	
   421	U_CDECL_BEGIN
   422	
   423	struct USpoofChecker;
   424	/**
   425	 * @stable ICU 4.2
   426	 */
   427	typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
   428	
   429	struct USpoofCheckResult;
   430	/**
   431	 * @see uspoof_openCheckResult
   432	 * @stable ICU 58
   433	 */
   434	typedef struct USpoofCheckResult USpoofCheckResult;
   435	
   436	/**
   437	 * Enum for the kinds of checks that USpoofChecker can perform.
   438	 * These enum values are used both to select the set of checks that
   439	 * will be performed, and to report results from the check function.
   440	 *
   441	 * @stable ICU 4.2
   442	 */
   443	typedef enum USpoofChecks {
   444	    /**
   445	     * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
   446	     * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
   447	     * 4.
   448	     *
   449	     * @see uspoof_areConfusable
   450	     * @stable ICU 4.2
   451	     */
   452	    USPOOF_SINGLE_SCRIPT_CONFUSABLE =   1,
   453	
   454	    /**
   455	     * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
   456	     * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
   457	     * 39 section 4.
   458	     *
   459	     * @see uspoof_areConfusable
   460	     * @stable ICU 4.2
   461	     */
   462	    USPOOF_MIXED_SCRIPT_CONFUSABLE  =   2,
   463	
   464	    /**
   465	     * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
   466	     * that the two strings are visually confusable and that they are not from the same script but both of them are
   467	     * single-script strings, according to UTS 39 section 4.
   468	     *
   469	     * @see uspoof_areConfusable
   470	     * @stable ICU 4.2
   471	     */
   472	    USPOOF_WHOLE_SCRIPT_CONFUSABLE  =   4,
   473	
   474	    /**
   475	     * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables.  You may set
   476	     * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
   477	     * make {@link uspoof_areConfusable} return only those types of confusables.
   478	     *
   479	     * @see uspoof_areConfusable
   480	     * @see uspoof_getSkeleton
   481	     * @stable ICU 58
   482	     */
   483	    USPOOF_CONFUSABLE               =   USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
   484	
   485	#ifndef U_HIDE_DEPRECATED_API
   486	    /**
   487	      * This flag is deprecated and no longer affects the behavior of SpoofChecker.
   488	      *
   489	      * @deprecated ICU 58  Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
   490	      */
   491	    USPOOF_ANY_CASE                 =   8,
   492	#endif  /* U_HIDE_DEPRECATED_API */
   493	
   494	    /**
   495	      * Check that an identifier is no looser than the specified RestrictionLevel.
   496	      * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
   497	      *
   498	      * If USPOOF_AUX_INFO is enabled the actual restriction level of the
   499	      * identifier being tested will also be returned by uspoof_check().
   500	      *
   501	      * @see URestrictionLevel
   502	      * @see uspoof_setRestrictionLevel
   503	      * @see USPOOF_AUX_INFO
   504	      *
   505	      * @stable ICU 51
   506	      */
   507	    USPOOF_RESTRICTION_LEVEL        = 16,
   508	
   509	#ifndef U_HIDE_DEPRECATED_API
   510	    /** Check that an identifier contains only characters from a
   511	      * single script (plus chars from the common and inherited scripts.)
   512	      * Applies to checks of a single identifier check only.
   513	      * @deprecated ICU 51  Use RESTRICTION_LEVEL instead.
   514	      */
   515	    USPOOF_SINGLE_SCRIPT            =  USPOOF_RESTRICTION_LEVEL,
   516	#endif  /* U_HIDE_DEPRECATED_API */
   517	
   518	    /** Check an identifier for the presence of invisible characters,
   519	      * such as zero-width spaces, or character sequences that are
   520	      * likely not to display, such as multiple occurrences of the same
   521	      * non-spacing mark.  This check does not test the input string as a whole
   522	      * for conformance to any particular syntax for identifiers.
   523	      */
   524	    USPOOF_INVISIBLE                =  32,
   525	
   526	    /** Check that an identifier contains only characters from a specified set
   527	      * of acceptable characters.  See {@link uspoof_setAllowedChars} and
   528	      * {@link uspoof_setAllowedLocales}.  Note that a string that fails this check
   529	      * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
   530	      */
   531	    USPOOF_CHAR_LIMIT               =  64,
   532	
   533	    /**
   534	     * Check that an identifier does not mix numbers from different numbering systems.
   535	     * For more information, see UTS 39 section 5.3.
   536	     *
   537	     * @stable ICU 51
   538	     */
   539	    USPOOF_MIXED_NUMBERS            = 128,
   540	
   541	    /**
   542	     * Check that an identifier does not have a combining character following a character in which that
   543	     * combining character would be hidden; for example 'i' followed by a U+0307 combining dot.
   544	     *
   545	     * More specifically, the following characters are forbidden from preceding a U+0307:
   546	     * <ul>
   547	     * <li>Those with the Soft_Dotted Unicode property (which includes 'i' and 'j')</li>
   548	     * <li>Latin lowercase letter 'l'</li>
   549	     * <li>Dotless 'i' and 'j' ('ı' and 'ȷ', U+0131 and U+0237)</li>
   550	     * <li>Any character whose confusable prototype ends with such a character
   551	     * (Soft_Dotted, 'l', 'ı', or 'ȷ')</li>
   552	     * </ul>
   553	     * In addition, combining characters are allowed between the above characters and U+0307 except those
   554	     * with combining class 0 or combining class "Above" (230, same class as U+0307).
   555	     *
   556	     * This list and the number of combing characters considered by this check may grow over time.
   557	     *
   558	     * @stable ICU 62
   559	     */
   560	    USPOOF_HIDDEN_OVERLAY            = 256,
   561	
   562	   /**
   563	     * Enable all spoof checks.
   564	     *
   565	     * @stable ICU 4.6
   566	     */
   567	    USPOOF_ALL_CHECKS               = 0xFFFF,
   568	
   569	    /**
   570	      * Enable the return of auxiliary (non-error) information in the
   571	      * upper bits of the check results value.
   572	      *
   573	      * If this "check" is not enabled, the results of {@link uspoof_check} will be
   574	      * zero when an identifier passes all of the enabled checks.
   575	      *
   576	      * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
   577	      * be zero when an identifier passes all checks.
   578	      *
   579	      * @stable ICU 51
   580	      */
   581	    USPOOF_AUX_INFO                  = 0x40000000
   582	
   583	    } USpoofChecks;
   584	
   585	
   586	    /**
   587	     * Constants from UTS #39 for use in {@link uspoof_setRestrictionLevel}, and
   588	     * for returned identifier restriction levels in check results.
   589	     *
   590	     * @stable ICU 51
   591	     *
   592	     * @see uspoof_setRestrictionLevel
   593	     * @see uspoof_check
   594	     */
   595	    typedef enum URestrictionLevel {
   596	        /**
   597	         * All characters in the string are in the identifier profile and all characters in the string are in the
   598	         * ASCII range.
   599	         *
   600	         * @stable ICU 51
   601	         */
   602	        USPOOF_ASCII = 0x10000000,
   603	        /**
   604	         * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
   605	         * the string is single-script, according to the definition in UTS 39 section 5.1.
   606	         *
   607	         * @stable ICU 53
   608	         */
   609	        USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
   610	        /**
   611	         * The string classifies as Single Script, or all characters in the string are in the identifier profile and
   612	         * the string is covered by any of the following sets of scripts, according to the definition in UTS 39
   613	         * section 5.1:
   614	         * <ul>
   615	         *   <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
   616	         *   <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
   617	         *   <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
   618	         * </ul>
   619	         * This is the default restriction in ICU.
   620	         *
   621	         * @stable ICU 51
   622	         */
   623	        USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
   624	        /**
   625	         * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
   626	         * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
   627	         * Greek, and Cherokee.
   628	         *
   629	         * @stable ICU 51
   630	         */
   631	        USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
   632	        /**
   633	         * All characters in the string are in the identifier profile.  Allow arbitrary mixtures of scripts.
   634	         *
   635	         * @stable ICU 51
   636	         */
   637	        USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
   638	        /**
   639	         * Any valid identifiers, including characters outside of the Identifier Profile.
   640	         *
   641	         * @stable ICU 51
   642	         */
   643	        USPOOF_UNRESTRICTIVE = 0x60000000,
   644	        /**
   645	         * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
   646	         *
   647	         * @stable ICU 53
   648	         */
   649	        USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
   650	#ifndef U_HIDE_INTERNAL_API
   651	        /**
   652	         * An undefined restriction level.
   653	         * @internal
   654	         */
   655	        USPOOF_UNDEFINED_RESTRICTIVE = -1
   656	#endif  /* U_HIDE_INTERNAL_API */
   657	    } URestrictionLevel;
   658	
   659	/**
   660	 *  Create a Unicode Spoof Checker, configured to perform all
   661	 *  checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
   662	 *  Note that additional checks may be added in the future,
   663	 *  resulting in the changes to the default checking behavior.
   664	 *
   665	 *  @param status  The error code, set if this function encounters a problem.
   666	 *  @return        the newly created Spoof Checker
   667	 *  @stable ICU 4.2
   668	 */
   669	U_CAPI USpoofChecker * U_EXPORT2
   670	uspoof_open(UErrorCode *status);
   671	
   672	
   673	/**
   674	 * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
   675	 * Inverse of uspoof_serialize().
   676	 * The memory containing the serialized data must remain valid and unchanged
   677	 * as long as the spoof checker, or any cloned copies of the spoof checker,
   678	 * are in use.  Ownership of the memory remains with the caller.
   679	 * The spoof checker (and any clones) must be closed prior to deleting the
   680	 * serialized data.
   681	 *
   682	 * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data
   683	 * @param length the number of bytes available at data;
   684	 *               can be more than necessary
   685	 * @param pActualLength receives the actual number of bytes at data taken up by the data;
   686	 *                      can be NULL
   687	 * @param pErrorCode ICU error code
   688	 * @return the spoof checker.
   689	 *
   690	 * @see uspoof_open
   691	 * @see uspoof_serialize
   692	 * @stable ICU 4.2
   693	 */
   694	U_CAPI USpoofChecker * U_EXPORT2
   695	uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
   696	                          UErrorCode *pErrorCode);
   697	
   698	/**
   699	  * Open a Spoof Checker from the source form of the spoof data.
   700	  * The input corresponds to the Unicode data file confusables.txt
   701	  * as described in Unicode Technical Standard #39.  The syntax of the source data
   702	  * is as described in UTS #39 for this file, and the content of
   703	  * this file is acceptable input.
   704	  *
   705	  * The character encoding of the (char *) input text is UTF-8.
   706	  *
   707	  * @param confusables a pointer to the confusable characters definitions,
   708	  *                    as found in file confusables.txt from unicode.org.
   709	  * @param confusablesLen The length of the confusables text, or -1 if the
   710	  *                    input string is zero terminated.
   711	  * @param confusablesWholeScript
   712	  *                    Deprecated in ICU 58.  No longer used.
   713	  * @param confusablesWholeScriptLen
   714	  *                    Deprecated in ICU 58.  No longer used.
   715	  * @param errType     In the event of an error in the input, indicates
   716	  *                    which of the input files contains the error.
   717	  *                    The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
   718	  *                    USPOOF_WHOLE_SCRIPT_CONFUSABLE, or
   719	  *                    zero if no errors are found.
   720	  * @param pe          In the event of an error in the input, receives the position
   721	  *                    in the input text (line, offset) of the error.
   722	  * @param status      an in/out ICU UErrorCode.  Among the possible errors is
   723	  *                    U_PARSE_ERROR, which is used to report syntax errors
   724	  *                    in the input.
   725	  * @return            A spoof checker that uses the rules from the input files.
   726	  * @stable ICU 4.2
   727	  */
   728	U_CAPI USpoofChecker * U_EXPORT2
   729	uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
   730	                      const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
   731	                      int32_t *errType, UParseError *pe, UErrorCode *status);
   732	
   733	
   734	/**
   735	  * Close a Spoof Checker, freeing any memory that was being held by
   736	  *   its implementation.
   737	  * @stable ICU 4.2
   738	  */
   739	U_CAPI void U_EXPORT2
   740	uspoof_close(USpoofChecker *sc);
   741	
   742	/**
   743	 * Clone a Spoof Checker.  The clone will be set to perform the same checks
   744	 *   as the original source.
   745	 *
   746	 * @param sc       The source USpoofChecker
   747	 * @param status   The error code, set if this function encounters a problem.
   748	 * @return
   749	 * @stable ICU 4.2
   750	 */
   751	U_CAPI USpoofChecker * U_EXPORT2
   752	uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
   753	
   754	
   755	/**
   756	 * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
   757	 * overwrites any checks that may have already been enabled. By default, all checks are enabled.
   758	 *
   759	 * To enable specific checks and disable all others,
   760	 * OR together only the bit constants for the desired checks.
   761	 * For example, to fail strings containing characters outside of
   762	 * the set specified by {@link uspoof_setAllowedChars} and
   763	 * also strings that contain digits from mixed numbering systems:
   764	 *
   765	 * <pre>
   766	 * {@code
   767	 * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
   768	 * }
   769	 * </pre>
   770	 *
   771	 * To disable specific checks and enable all others,
   772	 * start with ALL_CHECKS and "AND away" the not-desired checks.
   773	 * For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
   774	 * it is good practice to disable the CONFUSABLE check:
   775	 *
   776	 * <pre>
   777	 * {@code
   778	 * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
   779	 * }
   780	 * </pre>
   781	 *
   782	 * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
   783	 * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
   784	 * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
   785	 * methods.
   786	 *
   787	 * @param sc       The USpoofChecker
   788	 * @param checks         The set of checks that this spoof checker will perform.
   789	 *                 The value is a bit set, obtained by OR-ing together
   790	 *                 values from enum USpoofChecks.
   791	 * @param status   The error code, set if this function encounters a problem.
   792	 * @stable ICU 4.2
   793	 *
   794	 */
   795	U_CAPI void U_EXPORT2
   796	uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
   797	
   798	/**
   799	 * Get the set of checks that this Spoof Checker has been configured to perform.
   800	 *
   801	 * @param sc       The USpoofChecker
   802	 * @param status   The error code, set if this function encounters a problem.
   803	 * @return         The set of checks that this spoof checker will perform.
   804	 *                 The value is a bit set, obtained by OR-ing together
   805	 *                 values from enum USpoofChecks.
   806	 * @stable ICU 4.2
   807	 *
   808	 */
   809	U_CAPI int32_t U_EXPORT2
   810	uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
   811	
   812	/**
   813	 * Set the loosest restriction level allowed for strings. The default if this is not called is
   814	 * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
   815	 * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
   816	 * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
   817	 *
   818	 * @param sc       The USpoofChecker
   819	 * @param restrictionLevel The loosest restriction level allowed.
   820	 * @see URestrictionLevel
   821	 * @stable ICU 51
   822	 */
   823	U_CAPI void U_EXPORT2
   824	uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
   825	
   826	
   827	/**
   828	  * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
   829	  *
   830	  * @return The restriction level
   831	  * @see URestrictionLevel
   832	  * @stable ICU 51
   833	  */
   834	U_CAPI URestrictionLevel U_EXPORT2
   835	uspoof_getRestrictionLevel(const USpoofChecker *sc);
   836	
   837	/**
   838	 * Limit characters that are acceptable in identifiers being checked to those
   839	 * normally used with the languages associated with the specified locales.
   840	 * Any previously specified list of locales is replaced by the new settings.
   841	 *
   842	 * A set of languages is determined from the locale(s), and
   843	 * from those a set of acceptable Unicode scripts is determined.
   844	 * Characters from this set of scripts, along with characters from
   845	 * the "common" and "inherited" Unicode Script categories
   846	 * will be permitted.
   847	 *
   848	 * Supplying an empty string removes all restrictions;
   849	 * characters from any script will be allowed.
   850	 *
   851	 * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
   852	 * USpoofChecker when calling this function with a non-empty list
   853	 * of locales.
   854	 *
   855	 * The Unicode Set of characters that will be allowed is accessible
   856	 * via the uspoof_getAllowedChars() function.  uspoof_setAllowedLocales()
   857	 * will <i>replace</i> any previously applied set of allowed characters.
   858	 *
   859	 * Adjustments, such as additions or deletions of certain classes of characters,
   860	 * can be made to the result of uspoof_setAllowedLocales() by
   861	 * fetching the resulting set with uspoof_getAllowedChars(),
   862	 * manipulating it with the Unicode Set API, then resetting the
   863	 * spoof detectors limits with uspoof_setAllowedChars().
   864	 *
   865	 * @param sc           The USpoofChecker
   866	 * @param localesList  A list list of locales, from which the language
   867	 *                     and associated script are extracted.  The locales
   868	 *                     are comma-separated if there is more than one.
   869	 *                     White space may not appear within an individual locale,
   870	 *                     but is ignored otherwise.
   871	 *                     The locales are syntactically like those from the
   872	 *                     HTTP Accept-Language header.
   873	 *                     If the localesList is empty, no restrictions will be placed on
   874	 *                     the allowed characters.
   875	 *
   876	 * @param status       The error code, set if this function encounters a problem.
   877	 * @stable ICU 4.2
   878	 */
   879	U_CAPI void U_EXPORT2
   880	uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
   881	
   882	/**
   883	 * Get a list of locales for the scripts that are acceptable in strings
   884	 *  to be checked.  If no limitations on scripts have been specified,
   885	 *  an empty string will be returned.
   886	 *
   887	 *  uspoof_setAllowedChars() will reset the list of allowed to be empty.
   888	 *
   889	 *  The format of the returned list is the same as that supplied to
   890	 *  uspoof_setAllowedLocales(), but returned list may not be identical
   891	 *  to the originally specified string; the string may be reformatted,
   892	 *  and information other than languages from
   893	 *  the originally specified locales may be omitted.
   894	 *
   895	 * @param sc           The USpoofChecker
   896	 * @param status       The error code, set if this function encounters a problem.
   897	 * @return             A string containing a list of  locales corresponding
   898	 *                     to the acceptable scripts, formatted like an
   899	 *                     HTTP Accept Language value.
   900	 *
   901	 * @stable ICU 4.2
   902	 */
   903	U_CAPI const char * U_EXPORT2
   904	uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
   905	
   906	
   907	/**
   908	 * Limit the acceptable characters to those specified by a Unicode Set.
   909	 *   Any previously specified character limit is
   910	 *   is replaced by the new settings.  This includes limits on
   911	 *   characters that were set with the uspoof_setAllowedLocales() function.
   912	 *
   913	 * The USPOOF_CHAR_LIMIT test is automatically enabled for this
   914	 * USpoofChecker by this function.
   915	 *
   916	 * @param sc       The USpoofChecker
   917	 * @param chars    A Unicode Set containing the list of
   918	 *                 characters that are permitted.  Ownership of the set
   919	 *                 remains with the caller.  The incoming set is cloned by
   920	 *                 this function, so there are no restrictions on modifying
   921	 *                 or deleting the USet after calling this function.
   922	 * @param status   The error code, set if this function encounters a problem.
   923	 * @stable ICU 4.2
   924	 */
   925	U_CAPI void U_EXPORT2
   926	uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
   927	
   928	
   929	/**
   930	 * Get a USet for the characters permitted in an identifier.
   931	 * This corresponds to the limits imposed by the Set Allowed Characters
   932	 * functions. Limitations imposed by other checks will not be
   933	 * reflected in the set returned by this function.
   934	 *
   935	 * The returned set will be frozen, meaning that it cannot be modified
   936	 * by the caller.
   937	 *
   938	 * Ownership of the returned set remains with the Spoof Detector.  The
   939	 * returned set will become invalid if the spoof detector is closed,
   940	 * or if a new set of allowed characters is specified.
   941	 *
   942	 *
   943	 * @param sc       The USpoofChecker
   944	 * @param status   The error code, set if this function encounters a problem.
   945	 * @return         A USet containing the characters that are permitted by
   946	 *                 the USPOOF_CHAR_LIMIT test.
   947	 * @stable ICU 4.2
   948	 */
   949	U_CAPI const USet * U_EXPORT2
   950	uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
   951	
   952	
   953	/**
   954	 * Check the specified string for possible security issues.
   955	 * The text to be checked will typically be an identifier of some sort.
   956	 * The set of checks to be performed is specified with uspoof_setChecks().
   957	 *
   958	 * \note
   959	 *   Consider using the newer API, {@link uspoof_check2}, instead.
   960	 *   The newer API exposes additional information from the check procedure
   961	 *   and is otherwise identical to this method.
   962	 *
   963	 * @param sc      The USpoofChecker
   964	 * @param id      The identifier to be checked for possible security issues,
   965	 *                in UTF-16 format.
   966	 * @param length  the length of the string to be checked, expressed in
   967	 *                16 bit UTF-16 code units, or -1 if the string is
   968	 *                zero terminated.
   969	 * @param position  Deprecated in ICU 51.  Always returns zero.
   970	 *                Originally, an out parameter for the index of the first
   971	 *                string position that failed a check.
   972	 *                This parameter may be NULL.
   973	 * @param status  The error code, set if an error occurred while attempting to
   974	 *                perform the check.
   975	 *                Spoofing or security issues detected with the input string are
   976	 *                not reported here, but through the function's return value.
   977	 * @return        An integer value with bits set for any potential security
   978	 *                or spoofing issues detected.  The bits are defined by
   979	 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   980	 *                will be zero if the input string passes all of the
   981	 *                enabled checks.
   982	 * @see uspoof_check2
   983	 * @stable ICU 4.2
   984	 */
   985	U_CAPI int32_t U_EXPORT2
   986	uspoof_check(const USpoofChecker *sc,
   987	                         const UChar *id, int32_t length,
   988	                         int32_t *position,
   989	                         UErrorCode *status);
   990	
   991	
   992	/**
   993	 * Check the specified string for possible security issues.
   994	 * The text to be checked will typically be an identifier of some sort.
   995	 * The set of checks to be performed is specified with uspoof_setChecks().
   996	 *
   997	 * \note
   998	 *   Consider using the newer API, {@link uspoof_check2UTF8}, instead.
   999	 *   The newer API exposes additional information from the check procedure
  1000	 *   and is otherwise identical to this method.
  1001	 *
  1002	 * @param sc      The USpoofChecker
  1003	 * @param id      A identifier to be checked for possible security issues, in UTF8 format.
  1004	 * @param length  the length of the string to be checked, or -1 if the string is
  1005	 *                zero terminated.
  1006	 * @param position  Deprecated in ICU 51.  Always returns zero.
  1007	 *                Originally, an out parameter for the index of the first
  1008	 *                string position that failed a check.
  1009	 *                This parameter may be NULL.
  1010	 * @param status  The error code, set if an error occurred while attempting to
  1011	 *                perform the check.
  1012	 *                Spoofing or security issues detected with the input string are
  1013	 *                not reported here, but through the function's return value.
  1014	 *                If the input contains invalid UTF-8 sequences,
  1015	 *                a status of U_INVALID_CHAR_FOUND will be returned.
  1016	 * @return        An integer value with bits set for any potential security
  1017	 *                or spoofing issues detected.  The bits are defined by
  1018	 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
  1019	 *                will be zero if the input string passes all of the
  1020	 *                enabled checks.
  1021	 * @see uspoof_check2UTF8
  1022	 * @stable ICU 4.2
  1023	 */
  1024	U_CAPI int32_t U_EXPORT2
  1025	uspoof_checkUTF8(const USpoofChecker *sc,
  1026	                 const char *id, int32_t length,
  1027	                 int32_t *position,
  1028	                 UErrorCode *status);
  1029	
  1030	
  1031	/**
  1032	 * Check the specified string for possible security issues.
  1033	 * The text to be checked will typically be an identifier of some sort.
  1034	 * The set of checks to be performed is specified with uspoof_setChecks().
  1035	 *
  1036	 * @param sc      The USpoofChecker
  1037	 * @param id      The identifier to be checked for possible security issues,
  1038	 *                in UTF-16 format.
  1039	 * @param length  the length of the string to be checked, or -1 if the string is
  1040	 *                zero terminated.
  1041	 * @param checkResult  An instance of USpoofCheckResult to be filled with
  1042	 *                details about the identifier.  Can be NULL.
  1043	 * @param status  The error code, set if an error occurred while attempting to
  1044	 *                perform the check.
  1045	 *                Spoofing or security issues detected with the input string are
  1046	 *                not reported here, but through the function's return value.
  1047	 * @return        An integer value with bits set for any potential security
  1048	 *                or spoofing issues detected.  The bits are defined by
  1049	 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
  1050	 *                will be zero if the input string passes all of the
  1051	 *                enabled checks.  Any information in this bitmask will be
  1052	 *                consistent with the information saved in the optional
  1053	 *                checkResult parameter.
  1054	 * @see uspoof_openCheckResult
  1055	 * @see uspoof_check2UTF8
  1056	 * @see uspoof_check2UnicodeString
  1057	 * @stable ICU 58
  1058	 */
  1059	U_CAPI int32_t U_EXPORT2
  1060	uspoof_check2(const USpoofChecker *sc,
  1061	    const UChar* id, int32_t length,
  1062	    USpoofCheckResult* checkResult,
  1063	    UErrorCode *status);
  1064	
  1065	/**
  1066	 * Check the specified string for possible security issues.
  1067	 * The text to be checked will typically be an identifier of some sort.
  1068	 * The set of checks to be performed is specified with uspoof_setChecks().
  1069	 *
  1070	 * This version of {@link uspoof_check} accepts a USpoofCheckResult, which
  1071	 * returns additional information about the identifier.  For more
  1072	 * information, see {@link uspoof_openCheckResult}.
  1073	 *
  1074	 * @param sc      The USpoofChecker
  1075	 * @param id      A identifier to be checked for possible security issues, in UTF8 format.
  1076	 * @param length  the length of the string to be checked, or -1 if the string is
  1077	 *                zero terminated.
  1078	 * @param checkResult  An instance of USpoofCheckResult to be filled with
  1079	 *                details about the identifier.  Can be NULL.
  1080	 * @param status  The error code, set if an error occurred while attempting to
  1081	 *                perform the check.
  1082	 *                Spoofing or security issues detected with the input string are
  1083	 *                not reported here, but through the function's return value.
  1084	 * @return        An integer value with bits set for any potential security
  1085	 *                or spoofing issues detected.  The bits are defined by
  1086	 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
  1087	 *                will be zero if the input string passes all of the
  1088	 *                enabled checks.  Any information in this bitmask will be
  1089	 *                consistent with the information saved in the optional
  1090	 *                checkResult parameter.
  1091	 * @see uspoof_openCheckResult
  1092	 * @see uspoof_check2
  1093	 * @see uspoof_check2UnicodeString
  1094	 * @stable ICU 58
  1095	 */
  1096	U_CAPI int32_t U_EXPORT2
  1097	uspoof_check2UTF8(const USpoofChecker *sc,
  1098	    const char *id, int32_t length,
  1099	    USpoofCheckResult* checkResult,
  1100	    UErrorCode *status);
  1101	
  1102	/**
  1103	 * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
  1104	 * information about the identifier.  Information includes:
  1105	 * <ul>
  1106	 *   <li>A bitmask of the checks that failed</li>
  1107	 *   <li>The identifier's restriction level (UTS 39 section 5.2)</li>
  1108	 *   <li>The set of numerics in the string (UTS 39 section 5.3)</li>
  1109	 * </ul>
  1110	 * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
  1111	 * of {@link uspoof_check2}.
  1112	 *
  1113	 * @param status  The error code, set if this function encounters a problem.
  1114	 * @return        the newly created USpoofCheckResult
  1115	 * @see uspoof_check2
  1116	 * @see uspoof_check2UTF8
  1117	 * @see uspoof_check2UnicodeString
  1118	 * @stable ICU 58
  1119	 */
  1120	U_CAPI USpoofCheckResult* U_EXPORT2
  1121	uspoof_openCheckResult(UErrorCode *status);
  1122	
  1123	/**
  1124	 * Close a USpoofCheckResult, freeing any memory that was being held by
  1125	 *   its implementation.
  1126	 *
  1127	 * @param checkResult  The instance of USpoofCheckResult to close
  1128	 * @stable ICU 58
  1129	 */
  1130	U_CAPI void U_EXPORT2
  1131	uspoof_closeCheckResult(USpoofCheckResult *checkResult);
  1132	
  1133	/**
  1134	 * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
  1135	 * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
  1136	 *
  1137	 * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
  1138	 * @param status       The error code, set if an error occurred.
  1139	 * @return        An integer value with bits set for any potential security
  1140	 *                or spoofing issues detected.  The bits are defined by
  1141	 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
  1142	 *                will be zero if the input string passes all of the
  1143	 *                enabled checks.
  1144	 * @see uspoof_setChecks
  1145	 * @stable ICU 58
  1146	 */
  1147	U_CAPI int32_t U_EXPORT2
  1148	uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
  1149	
  1150	/**
  1151	 * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
  1152	 * was enabled; otherwise, undefined.
  1153	 *
  1154	 * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
  1155	 * @param status       The error code, set if an error occurred.
  1156	 * @return             The restriction level contained in the USpoofCheckResult
  1157	 * @see uspoof_setRestrictionLevel
  1158	 * @stable ICU 58
  1159	 */
  1160	U_CAPI URestrictionLevel U_EXPORT2
  1161	uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
  1162	
  1163	/**
  1164	 * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
  1165	 * otherwise, undefined.  The set will contain the zero digit from each decimal number system found
  1166	 * in the input string.  Ownership of the returned USet remains with the USpoofCheckResult.
  1167	 * The USet will be free'd when {@link uspoof_closeCheckResult} is called.
  1168	 *
  1169	 * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
  1170	 * @return             The set of numerics contained in the USpoofCheckResult
  1171	 * @param status       The error code, set if an error occurred.
  1172	 * @stable ICU 58
  1173	 */
  1174	U_CAPI const USet* U_EXPORT2
  1175	uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
  1176	
  1177	
  1178	/**
  1179	 * Check whether two specified strings are visually confusable.
  1180	 *
  1181	 * If the strings are confusable, the return value will be nonzero, as long as
  1182	 * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
  1183	 *
  1184	 * The bits in the return value correspond to flags for each of the classes of
  1185	 * confusables applicable to the two input strings.  According to UTS 39
  1186	 * section 4, the possible flags are:
  1187	 *
  1188	 * <ul>
  1189	 *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
  1190	 *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
  1191	 *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
  1192	 * </ul>
  1193	 *
  1194	 * If one or more of the above flags were not listed in uspoof_setChecks(), this
  1195	 * function will never report that class of confusable.  The check
  1196	 * {@link USPOOF_CONFUSABLE} enables all three flags.
  1197	 *
  1198	 *
  1199	 * @param sc      The USpoofChecker
  1200	 * @param id1     The first of the two identifiers to be compared for
  1201	 *                confusability.  The strings are in UTF-16 format.
  1202	 * @param length1 the length of the first identifier, expressed in
  1203	 *                16 bit UTF-16 code units, or -1 if the string is
  1204	 *                nul terminated.
  1205	 * @param id2     The second of the two identifiers to be compared for
  1206	 *                confusability.  The identifiers are in UTF-16 format.
  1207	 * @param length2 The length of the second identifiers, expressed in
  1208	 *                16 bit UTF-16 code units, or -1 if the string is
  1209	 *                nul terminated.
  1210	 * @param status  The error code, set if an error occurred while attempting to
  1211	 *                perform the check.
  1212	 *                Confusability of the identifiers is not reported here,
  1213	 *                but through this function's return value.
  1214	 * @return        An integer value with bit(s) set corresponding to
  1215	 *                the type of confusability found, as defined by
  1216	 *                enum USpoofChecks.  Zero is returned if the identifiers
  1217	 *                are not confusable.
  1218	 *
  1219	 * @stable ICU 4.2
  1220	 */
  1221	U_CAPI int32_t U_EXPORT2
  1222	uspoof_areConfusable(const USpoofChecker *sc,
  1223	                     const UChar *id1, int32_t length1,
  1224	                     const UChar *id2, int32_t length2,
  1225	                     UErrorCode *status);
  1226	
  1227	#ifndef U_HIDE_DRAFT_API
  1228	/**
  1229	 * Check whether two specified strings are visually confusable when
  1230	 * displayed in a context with the given paragraph direction.
  1231	 *
  1232	 * If the strings are confusable, the return value will be nonzero, as long as
  1233	 * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
  1234	 *
  1235	 * The bits in the return value correspond to flags for each of the classes of
  1236	 * confusables applicable to the two input strings.  According to UTS 39
  1237	 * section 4, the possible flags are:
  1238	 *
  1239	 * <ul>
  1240	 *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
  1241	 *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
  1242	 *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
  1243	 * </ul>
  1244	 *
  1245	 * If one or more of the above flags were not listed in uspoof_setChecks(), this
  1246	 * function will never report that class of confusable.  The check
  1247	 * {@link USPOOF_CONFUSABLE} enables all three flags.
  1248	 *
  1249	 *
  1250	 * @param sc      The USpoofChecker
  1251	 * @param direction The paragraph direction with which the identifiers are
  1252	 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
  1253	 * @param id1     The first of the two identifiers to be compared for
  1254	 *                confusability.  The strings are in UTF-16 format.
  1255	 * @param length1 the length of the first identifier, expressed in
  1256	 *                16 bit UTF-16 code units, or -1 if the string is
  1257	 *                nul terminated.
  1258	 * @param id2     The second of the two identifiers to be compared for
  1259	 *                confusability.  The identifiers are in UTF-16 format.
  1260	 * @param length2 The length of the second identifiers, expressed in
  1261	 *                16 bit UTF-16 code units, or -1 if the string is
  1262	 *                nul terminated.
  1263	 * @param status  The error code, set if an error occurred while attempting to
  1264	 *                perform the check.
  1265	 *                Confusability of the identifiers is not reported here,
  1266	 *                but through this function's return value.
  1267	 * @return        An integer value with bit(s) set corresponding to
  1268	 *                the type of confusability found, as defined by
  1269	 *                enum USpoofChecks.  Zero is returned if the identifiers
  1270	 *                are not confusable.
  1271	 *
  1272	 * @draft ICU 74
  1273	 */
  1274	U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
  1275	                                                  const UChar *id1, int32_t length1,
  1276	                                                  const UChar *id2, int32_t length2,
  1277	                                                  UErrorCode *status);
  1278	#endif /* U_HIDE_DRAFT_API */
  1279	
  1280	/**
  1281	 * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
  1282	 *
  1283	 * @param sc      The USpoofChecker
  1284	 * @param id1     The first of the two identifiers to be compared for
  1285	 *                confusability.  The strings are in UTF-8 format.
  1286	 * @param length1 the length of the first identifiers, in bytes, or -1
  1287	 *                if the string is nul terminated.
  1288	 * @param id2     The second of the two identifiers to be compared for
  1289	 *                confusability.  The strings are in UTF-8 format.
  1290	 * @param length2 The length of the second string in bytes, or -1
  1291	 *                if the string is nul terminated.
  1292	 * @param status  The error code, set if an error occurred while attempting to
  1293	 *                perform the check.
  1294	 *                Confusability of the strings is not reported here,
  1295	 *                but through this function's return value.
  1296	 * @return        An integer value with bit(s) set corresponding to
  1297	 *                the type of confusability found, as defined by
  1298	 *                enum USpoofChecks.  Zero is returned if the strings
  1299	 *                are not confusable.
  1300	 *
  1301	 * @stable ICU 4.2
  1302	 *
  1303	 * @see uspoof_areConfusable
  1304	 */
  1305	U_CAPI int32_t U_EXPORT2
  1306	uspoof_areConfusableUTF8(const USpoofChecker *sc,
  1307	                         const char *id1, int32_t length1,
  1308	                         const char *id2, int32_t length2,
  1309	                         UErrorCode *status);
  1310	
  1311	#ifndef U_HIDE_DRAFT_API
  1312	/**
  1313	 * A version of {@link uspoof_areBidiConfusable} accepting strings in UTF-8 format.
  1314	 *
  1315	 * @param sc      The USpoofChecker
  1316	 * @param direction The paragraph direction with which the identifiers are
  1317	 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
  1318	 * @param id1     The first of the two identifiers to be compared for
  1319	 *                confusability.  The strings are in UTF-8 format.
  1320	 * @param length1 the length of the first identifiers, in bytes, or -1
  1321	 *                if the string is nul terminated.
  1322	 * @param id2     The second of the two identifiers to be compared for
  1323	 *                confusability.  The strings are in UTF-8 format.
  1324	 * @param length2 The length of the second string in bytes, or -1
  1325	 *                if the string is nul terminated.
  1326	 * @param status  The error code, set if an error occurred while attempting to
  1327	 *                perform the check.
  1328	 *                Confusability of the strings is not reported here,
  1329	 *                but through this function's return value.
  1330	 * @return        An integer value with bit(s) set corresponding to
  1331	 *                the type of confusability found, as defined by
  1332	 *                enum USpoofChecks.  Zero is returned if the strings
  1333	 *                are not confusable.
  1334	 *
  1335	 * @draft ICU 74
  1336	 *
  1337	 * @see uspoof_areBidiConfusable
  1338	 */
  1339	U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
  1340	                                                      const char *id1, int32_t length1,
  1341	                                                      const char *id2, int32_t length2,
  1342	                                                      UErrorCode *status);
  1343	#endif /* U_HIDE_DRAFT_API */
  1344	
  1345	/**
  1346	 *  Get the "skeleton" for an identifier.
  1347	 *  Skeletons are a transformation of the input identifier;
  1348	 * Two identifiers are confusable if their skeletons are identical.
  1349	 *  See Unicode Technical Standard #39 for additional information.
  1350	 *
  1351	 *  Using skeletons directly makes it possible to quickly check
  1352	 *  whether an identifier is confusable with any of some large
  1353	 *  set of existing identifiers, by creating an efficiently
  1354	 *  searchable collection of the skeletons.
  1355	 *
  1356	 * @param sc      The USpoofChecker
  1357	 * @param type    Deprecated in ICU 58.  You may pass any number.
  1358	 *                Originally, controlled which of the Unicode confusable data
  1359	 *                tables to use.
  1360	 * @param id      The input identifier whose skeleton will be computed.
  1361	 * @param length  The length of the input identifier, expressed in 16 bit
  1362	 *                UTF-16 code units, or -1 if the string is zero terminated.
  1363	 * @param dest    The output buffer, to receive the skeleton string.
  1364	 * @param destCapacity  The length of the output buffer, in 16 bit units.
  1365	 *                The destCapacity may be zero, in which case the function will
  1366	 *                return the actual length of the skeleton.
  1367	 * @param status  The error code, set if an error occurred while attempting to
  1368	 *                perform the check.
  1369	 * @return        The length of the skeleton string.  The returned length
  1370	 *                is always that of the complete skeleton, even when the
  1371	 *                supplied buffer is too small (or of zero length)
  1372	 *
  1373	 * @stable ICU 4.2
  1374	 * @see uspoof_areConfusable
  1375	 */
  1376	U_CAPI int32_t U_EXPORT2
  1377	uspoof_getSkeleton(const USpoofChecker *sc,
  1378	                   uint32_t type,
  1379	                   const UChar *id,  int32_t length,
  1380	                   UChar *dest, int32_t destCapacity,
  1381	                   UErrorCode *status);
  1382	
  1383	#ifndef U_HIDE_DRAFT_API
  1384	/**
  1385	 *  Get the "bidiSkeleton" for an identifier and a direction.
  1386	 *  Skeletons are a transformation of the input identifier;
  1387	 *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
  1388	 *  they are RTL-confusable if their RTL bidiSkeletons are identical.
  1389	 *  See Unicode Technical Standard #39 for additional information:
  1390	 *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
  1391	 *
  1392	 *  Using skeletons directly makes it possible to quickly check
  1393	 *  whether an identifier is confusable with any of some large
  1394	 *  set of existing identifiers, by creating an efficiently
  1395	 *  searchable collection of the skeletons.
  1396	 *
  1397	 * @param sc      The USpoofChecker.
  1398	 * @param direction The context direction with which the identifier will be
  1399	 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
  1400	 * @param id      The input identifier whose skeleton will be computed.
  1401	 * @param length  The length of the input identifier, expressed in 16 bit
  1402	 *                UTF-16 code units, or -1 if the string is zero terminated.
  1403	 * @param dest    The output buffer, to receive the skeleton string.
  1404	 * @param destCapacity  The length of the output buffer, in 16 bit units.
  1405	 *                The destCapacity may be zero, in which case the function will
  1406	 *                return the actual length of the skeleton.
  1407	 * @param status  The error code, set if an error occurred while attempting to
  1408	 *                perform the check.
  1409	 * @return        The length of the skeleton string.  The returned length
  1410	 *                is always that of the complete skeleton, even when the
  1411	 *                supplied buffer is too small (or of zero length)
  1412	 *
  1413	 * @draft ICU 74
  1414	 * @see uspoof_areBidiConfusable
  1415	 */
  1416	U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc,
  1417	                                                UBiDiDirection direction,
  1418	                                                const UChar *id, int32_t length,
  1419	                                                UChar *dest, int32_t destCapacity, UErrorCode *status);
  1420	#endif /* U_HIDE_DRAFT_API */
  1421	
  1422	/**
  1423	 *  Get the "skeleton" for an identifier.
  1424	 *  Skeletons are a transformation of the input identifier;
  1425	 *  Two identifiers are confusable if their skeletons are identical.
  1426	 *  See Unicode Technical Standard #39 for additional information.
  1427	 *
  1428	 *  Using skeletons directly makes it possible to quickly check
  1429	 *  whether an identifier is confusable with any of some large
  1430	 *  set of existing identifiers, by creating an efficiently
  1431	 *  searchable collection of the skeletons.
  1432	 *
  1433	 * @param sc      The USpoofChecker
  1434	 * @param type    Deprecated in ICU 58.  You may pass any number.
  1435	 *                Originally, controlled which of the Unicode confusable data
  1436	 *                tables to use.
  1437	 * @param id      The UTF-8 format identifier whose skeleton will be computed.
  1438	 * @param length  The length of the input string, in bytes,
  1439	 *                or -1 if the string is zero terminated.
  1440	 * @param dest    The output buffer, to receive the skeleton string.
  1441	 * @param destCapacity  The length of the output buffer, in bytes.
  1442	 *                The destCapacity may be zero, in which case the function will
  1443	 *                return the actual length of the skeleton.
  1444	 * @param status  The error code, set if an error occurred while attempting to
  1445	 *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
  1446	 *                   for invalid UTF-8 sequences, and
  1447	 *                   U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
  1448	 *                   to hold the complete skeleton.
  1449	 * @return        The length of the skeleton string, in bytes.  The returned length
  1450	 *                is always that of the complete skeleton, even when the
  1451	 *                supplied buffer is too small (or of zero length)
  1452	 *
  1453	 * @stable ICU 4.2
  1454	 */
  1455	U_CAPI int32_t U_EXPORT2
  1456	uspoof_getSkeletonUTF8(const USpoofChecker *sc,
  1457	                       uint32_t type,
  1458	                       const char *id,  int32_t length,
  1459	                       char *dest, int32_t destCapacity,
  1460	                       UErrorCode *status);
  1461	
  1462	#ifndef U_HIDE_DRAFT_API
  1463	/**
  1464	 *  Get the "bidiSkeleton" for an identifier and a direction.
  1465	 *  Skeletons are a transformation of the input identifier;
  1466	 *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
  1467	 *  they are RTL-confusable if their RTL bidiSkeletons are identical.
  1468	 *  See Unicode Technical Standard #39 for additional information:
  1469	 *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
  1470	 *
  1471	 *  Using skeletons directly makes it possible to quickly check
  1472	 *  whether an identifier is confusable with any of some large
  1473	 *  set of existing identifiers, by creating an efficiently
  1474	 *  searchable collection of the skeletons.
  1475	 *
  1476	 * @param sc      The USpoofChecker
  1477	 * @param direction The context direction with which the identifier will be
  1478	 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
  1479	 * @param id      The UTF-8 format identifier whose skeleton will be computed.
  1480	 * @param length  The length of the input string, in bytes,
  1481	 *                or -1 if the string is zero terminated.
  1482	 * @param dest    The output buffer, to receive the skeleton string.
  1483	 * @param destCapacity  The length of the output buffer, in bytes.
  1484	 *                The destCapacity may be zero, in which case the function will
  1485	 *                return the actual length of the skeleton.
  1486	 * @param status  The error code, set if an error occurred while attempting to
  1487	 *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
  1488	 *                for invalid UTF-8 sequences, and
  1489	 *                U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
  1490	 *                to hold the complete skeleton.
  1491	 * @return        The length of the skeleton string, in bytes.  The returned length
  1492	 *                is always that of the complete skeleton, even when the
  1493	 *                supplied buffer is too small (or of zero length)
  1494	 *
  1495	 * @draft ICU 74
  1496	 */
  1497	U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
  1498	                                                    const char *id, int32_t length, char *dest,
  1499	                                                    int32_t destCapacity, UErrorCode *status);
  1500	#endif /* U_HIDE_DRAFT_API */
  1501	
  1502	/**
  1503	  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
  1504	  * in http://unicode.org/Public/security/latest/xidmodifications.txt
  1505	  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
  1506	  *
  1507	  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
  1508	  * be deleted by the caller.
  1509	  *
  1510	  * @param status The error code, set if a problem occurs while creating the set.
  1511	  *
  1512	  * @stable ICU 51
  1513	  */
  1514	U_CAPI const USet * U_EXPORT2
  1515	uspoof_getInclusionSet(UErrorCode *status);
  1516	
  1517	/**
  1518	  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
  1519	  * in http://unicode.org/Public/security/latest/xidmodifications.txt
  1520	  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
  1521	  *
  1522	  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
  1523	  * be deleted by the caller.
  1524	  *
  1525	  * @param status The error code, set if a problem occurs while creating the set.
  1526	  *
  1527	  * @stable ICU 51
  1528	  */
  1529	U_CAPI const USet * U_EXPORT2
  1530	uspoof_getRecommendedSet(UErrorCode *status);
  1531	
  1532	/**
  1533	 * Serialize the data for a spoof detector into a chunk of memory.
  1534	 * The flattened spoof detection tables can later be used to efficiently
  1535	 * instantiate a new Spoof Detector.
  1536	 *
  1537	 * The serialized spoof checker includes only the data compiled from the
  1538	 * Unicode data tables by uspoof_openFromSource(); it does not include
  1539	 * include any other state or configuration that may have been set.
  1540	 *
  1541	 * @param sc   the Spoof Detector whose data is to be serialized.
  1542	 * @param data a pointer to 32-bit-aligned memory to be filled with the data,
  1543	 *             can be NULL if capacity==0
  1544	 * @param capacity the number of bytes available at data,
  1545	 *                 or 0 for preflighting
  1546	 * @param status an in/out ICU UErrorCode; possible errors include:
  1547	 * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
  1548	 * - U_ILLEGAL_ARGUMENT_ERROR  the data or capacity parameters are bad
  1549	 * @return the number of bytes written or needed for the spoof data
  1550	 *
  1551	 * @see utrie2_openFromSerialized()
  1552	 * @stable ICU 4.2
  1553	 */
  1554	U_CAPI int32_t U_EXPORT2
  1555	uspoof_serialize(USpoofChecker *sc,
  1556	                 void *data, int32_t capacity,
  1557	                 UErrorCode *status);
  1558	
  1559	U_CDECL_END
  1560	
  1561	#if U_SHOW_CPLUSPLUS_API
  1562	
  1563	U_NAMESPACE_BEGIN
  1564	
  1565	/**
  1566	 * \class LocalUSpoofCheckerPointer
  1567	 * "Smart pointer" class, closes a USpoofChecker via uspoof_close().
  1568	 * For most methods see the LocalPointerBase base class.
  1569	 *
  1570	 * @see LocalPointerBase
  1571	 * @see LocalPointer
  1572	 * @stable ICU 4.4
  1573	 */
  1574	/**
  1575	 * \cond
  1576	 * Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER.
  1577	 *       For now, suppress with a Doxygen cond
  1578	 */
  1579	U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close);
  1580	/** \endcond */
  1581	
  1582	/**
  1583	 * \class LocalUSpoofCheckResultPointer
  1584	 * "Smart pointer" class, closes a USpoofCheckResult via `uspoof_closeCheckResult()`.
  1585	 * For most methods see the LocalPointerBase base class.
  1586	 *
  1587	 * @see LocalPointerBase
  1588	 * @see LocalPointer
  1589	 * @stable ICU 58
  1590	 */
  1591	
  1592	/**
  1593	 * \cond
  1594	 * Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER.
  1595	 *       For now, suppress with a Doxygen cond
  1596	 */
  1597	U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
  1598	/** \endcond */
  1599	
  1600	U_NAMESPACE_END
  1601	
  1602	/**
  1603	 * Limit the acceptable characters to those specified by a Unicode Set.
  1604	 *   Any previously specified character limit is
  1605	 *   is replaced by the new settings.    This includes limits on
  1606	 *   characters that were set with the uspoof_setAllowedLocales() function.
  1607	 *
  1608	 * The USPOOF_CHAR_LIMIT test is automatically enabled for this
  1609	 * USoofChecker by this function.
  1610	 *
  1611	 * @param sc       The USpoofChecker
  1612	 * @param chars    A Unicode Set containing the list of
  1613	 *                 characters that are permitted.  Ownership of the set
  1614	 *                 remains with the caller.  The incoming set is cloned by
  1615	 *                 this function, so there are no restrictions on modifying
  1616	 *                 or deleting the UnicodeSet after calling this function.
  1617	 * @param status   The error code, set if this function encounters a problem.
  1618	 * @stable ICU 4.2
  1619	 */
  1620	U_CAPI void U_EXPORT2
  1621	uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);
  1622	
  1623	
  1624	/**
  1625	 * Get a UnicodeSet for the characters permitted in an identifier.
  1626	 * This corresponds to the limits imposed by the Set Allowed Characters /
  1627	 * UnicodeSet functions. Limitations imposed by other checks will not be
  1628	 * reflected in the set returned by this function.
  1629	 *
  1630	 * The returned set will be frozen, meaning that it cannot be modified
  1631	 * by the caller.
  1632	 *
  1633	 * Ownership of the returned set remains with the Spoof Detector.  The
  1634	 * returned set will become invalid if the spoof detector is closed,
  1635	 * or if a new set of allowed characters is specified.
  1636	 *
  1637	 *
  1638	 * @param sc       The USpoofChecker
  1639	 * @param status   The error code, set if this function encounters a problem.
  1640	 * @return         A UnicodeSet containing the characters that are permitted by
  1641	 *                 the USPOOF_CHAR_LIMIT test.
  1642	 * @stable ICU 4.2
  1643	 */
  1644	U_CAPI const icu::UnicodeSet * U_EXPORT2
  1645	uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
  1646	
  1647	/**
  1648	 * Check the specified string for possible security issues.
  1649	 * The text to be checked will typically be an identifier of some sort.
  1650	 * The set of checks to be performed is specified with uspoof_setChecks().
  1651	 *
  1652	 * \note
  1653	 *   Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
  1654	 *   The newer API exposes additional information from the check procedure
  1655	 *   and is otherwise identical to this method.
  1656	 *
  1657	 * @param sc      The USpoofChecker
  1658	 * @param id      A identifier to be checked for possible security issues.
  1659	 * @param position  Deprecated in ICU 51.  Always returns zero.
  1660	 *                Originally, an out parameter for the index of the first
  1661	 *                string position that failed a check.
  1662	 *                This parameter may be nullptr.
  1663	 * @param status  The error code, set if an error occurred while attempting to
  1664	 *                perform the check.
  1665	 *                Spoofing or security issues detected with the input string are
  1666	 *                not reported here, but through the function's return value.
  1667	 * @return        An integer value with bits set for any potential security
  1668	 *                or spoofing issues detected.  The bits are defined by
  1669	 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
  1670	 *                will be zero if the input string passes all of the
  1671	 *                enabled checks.
  1672	 * @see uspoof_check2UnicodeString
  1673	 * @stable ICU 4.2
  1674	 */
  1675	U_CAPI int32_t U_EXPORT2
  1676	uspoof_checkUnicodeString(const USpoofChecker *sc,
  1677	                          const icu::UnicodeString &id,
  1678	                          int32_t *position,
  1679	                          UErrorCode *status);
  1680	
  1681	/**
  1682	 * Check the specified string for possible security issues.
  1683	 * The text to be checked will typically be an identifier of some sort.
  1684	 * The set of checks to be performed is specified with uspoof_setChecks().
  1685	 *
  1686	 * @param sc      The USpoofChecker
  1687	 * @param id      A identifier to be checked for possible security issues.
  1688	 * @param checkResult  An instance of USpoofCheckResult to be filled with
  1689	 *                details about the identifier.  Can be nullptr.
  1690	 * @param status  The error code, set if an error occurred while attempting to
  1691	 *                perform the check.
  1692	 *                Spoofing or security issues detected with the input string are
  1693	 *                not reported here, but through the function's return value.
  1694	 * @return        An integer value with bits set for any potential security
  1695	 *                or spoofing issues detected.  The bits are defined by
  1696	 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
  1697	 *                will be zero if the input string passes all of the
  1698	 *                enabled checks.  Any information in this bitmask will be
  1699	 *                consistent with the information saved in the optional
  1700	 *                checkResult parameter.
  1701	 * @see uspoof_openCheckResult
  1702	 * @see uspoof_check2
  1703	 * @see uspoof_check2UTF8
  1704	 * @stable ICU 58
  1705	 */
  1706	U_CAPI int32_t U_EXPORT2
  1707	uspoof_check2UnicodeString(const USpoofChecker *sc,
  1708	    const icu::UnicodeString &id,
  1709	    USpoofCheckResult* checkResult,
  1710	    UErrorCode *status);
  1711	
  1712	/**
  1713	 * A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
  1714	 *
  1715	 * @param sc      The USpoofChecker
  1716	 * @param s1     The first of the two identifiers to be compared for
  1717	 *                confusability.  The strings are in UTF-8 format.
  1718	 * @param s2     The second of the two identifiers to be compared for
  1719	 *                confusability.  The strings are in UTF-8 format.
  1720	 * @param status  The error code, set if an error occurred while attempting to
  1721	 *                perform the check.
  1722	 *                Confusability of the identifiers is not reported here,
  1723	 *                but through this function's return value.
  1724	 * @return        An integer value with bit(s) set corresponding to
  1725	 *                the type of confusability found, as defined by
  1726	 *                enum USpoofChecks.  Zero is returned if the identifiers
  1727	 *                are not confusable.
  1728	 *
  1729	 * @stable ICU 4.2
  1730	 *
  1731	 * @see uspoof_areConfusable
  1732	 */
  1733	U_CAPI int32_t U_EXPORT2
  1734	uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
  1735	                                  const icu::UnicodeString &s1,
  1736	                                  const icu::UnicodeString &s2,
  1737	                                  UErrorCode *status);
  1738	
  1739	#ifndef U_HIDE_DRAFT_API
  1740	/**
  1741	 * A version of {@link uspoof_areBidiConfusable} accepting UnicodeStrings.
  1742	 *
  1743	 * @param sc      The USpoofChecker
  1744	 * @param direction The paragraph direction with which the identifiers are
  1745	 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
  1746	 * @param s1     The first of the two identifiers to be compared for
  1747	 *                confusability.  The strings are in UTF-8 format.
  1748	 * @param s2     The second of the two identifiers to be compared for
  1749	 *                confusability.  The strings are in UTF-8 format.
  1750	 * @param status  The error code, set if an error occurred while attempting to
  1751	 *                perform the check.
  1752	 *                Confusability of the identifiers is not reported here,
  1753	 *                but through this function's return value.
  1754	 * @return        An integer value with bit(s) set corresponding to
  1755	 *                the type of confusability found, as defined by
  1756	 *                enum USpoofChecks.  Zero is returned if the identifiers
  1757	 *                are not confusable.
  1758	 *
  1759	 * @draft ICU 74
  1760	 *
  1761	 * @see uspoof_areBidiConfusable
  1762	 */
  1763	U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
  1764	                                                               UBiDiDirection direction,
  1765	                                                               const icu::UnicodeString &s1,
  1766	                                                               const icu::UnicodeString &s2,
  1767	                                                               UErrorCode *status);
  1768	#endif /* U_HIDE_DRAFT_API */
  1769	
  1770	/**
  1771	 *  Get the "skeleton" for an identifier.
  1772	 *  Skeletons are a transformation of the input identifier;
  1773	 *  Two identifiers are confusable if their skeletons are identical.
  1774	 *  See Unicode Technical Standard #39 for additional information.
  1775	 *
  1776	 *  Using skeletons directly makes it possible to quickly check
  1777	 *  whether an identifier is confusable with any of some large
  1778	 *  set of existing identifiers, by creating an efficiently
  1779	 *  searchable collection of the skeletons.
  1780	 *
  1781	 * @param sc      The USpoofChecker.
  1782	 * @param type    Deprecated in ICU 58.  You may pass any number.
  1783	 *                Originally, controlled which of the Unicode confusable data
  1784	 *                tables to use.
  1785	 * @param id      The input identifier whose skeleton will be computed.
  1786	 * @param dest    The output identifier, to receive the skeleton string.
  1787	 * @param status  The error code, set if an error occurred while attempting to
  1788	 *                perform the check.
  1789	 * @return        A reference to the destination (skeleton) string.
  1790	 *
  1791	 * @stable ICU 4.2
  1792	 */
  1793	U_I18N_API icu::UnicodeString & U_EXPORT2
  1794	uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
  1795	                                uint32_t type,
  1796	                                const icu::UnicodeString &id,
  1797	                                icu::UnicodeString &dest,
  1798	                                UErrorCode *status);
  1799	
  1800	#ifndef U_HIDE_DRAFT_API
  1801	/**
  1802	 *  Get the "bidiSkeleton" for an identifier and a direction.
  1803	 *  Skeletons are a transformation of the input identifier;
  1804	 *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
  1805	 *  they are RTL-confusable if their RTL bidiSkeletons are identical.
  1806	 *  See Unicode Technical Standard #39 for additional information.
  1807	 *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
  1808	 *
  1809	 *  Using skeletons directly makes it possible to quickly check
  1810	 *  whether an identifier is confusable with any of some large
  1811	 *  set of existing identifiers, by creating an efficiently
  1812	 *  searchable collection of the skeletons.
  1813	 *
  1814	 * @param sc      The USpoofChecker.
  1815	 * @param direction The context direction with which the identifier will be
  1816	 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
  1817	 * @param id      The input identifier whose bidiSkeleton will be computed.
  1818	 * @param dest    The output identifier, to receive the skeleton string.
  1819	 * @param status  The error code, set if an error occurred while attempting to
  1820	 *                perform the check.
  1821	 * @return        A reference to the destination (skeleton) string.
  1822	 *
  1823	 * @draft ICU 74
  1824	 */
  1825	U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(
  1826	    const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id,
  1827	    icu::UnicodeString &dest, UErrorCode *status);
  1828	#endif /* U_HIDE_DRAFT_API */
  1829	
  1830	/**
  1831	  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
  1832	  * in http://unicode.org/Public/security/latest/xidmodifications.txt
  1833	  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
  1834	  *
  1835	  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
  1836	  * be deleted by the caller.
  1837	  *
  1838	  * @param status The error code, set if a problem occurs while creating the set.
  1839	  *
  1840	  * @stable ICU 51
  1841	  */
  1842	U_CAPI const icu::UnicodeSet * U_EXPORT2
  1843	uspoof_getInclusionUnicodeSet(UErrorCode *status);
  1844	
  1845	/**
  1846	  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
  1847	  * in http://unicode.org/Public/security/latest/xidmodifications.txt
  1848	  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
  1849	  *
  1850	  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
  1851	  * be deleted by the caller.
  1852	  *
  1853	  * @param status The error code, set if a problem occurs while creating the set.
  1854	  *
  1855	  * @stable ICU 51
  1856	  */
  1857	U_CAPI const icu::UnicodeSet * U_EXPORT2
  1858	uspoof_getRecommendedUnicodeSet(UErrorCode *status);
  1859	
  1860	#endif /* U_SHOW_CPLUSPLUS_API */
  1861	
  1862	#endif /* UCONFIG_NO_NORMALIZATION */
  1863	
  1864	#endif   /* USPOOF_H */