Where Online Learning is simpler!
The C and C++ Include Header Files
/usr/include/unicode/unorm2.h
$ cat -n /usr/include/unicode/unorm2.h 1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2009-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: unorm2.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2009dec15 16 * created by: Markus W. Scherer 17 */ 18 19 #ifndef __UNORM2_H__ 20 #define __UNORM2_H__ 21 22 /** 23 * \file 24 * \brief C API: New API for Unicode Normalization. 25 * 26 * Unicode normalization functionality for standard Unicode normalization or 27 * for using custom mapping tables. 28 * All instances of UNormalizer2 are unmodifiable/immutable. 29 * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. 30 * For more details see the Normalizer2 C++ class. 31 */ 32 33 #include "unicode/utypes.h" 34 #include "unicode/stringoptions.h" 35 #include "unicode/uset.h" 36 37 #if U_SHOW_CPLUSPLUS_API 38 #include "unicode/localpointer.h" 39 #endif // U_SHOW_CPLUSPLUS_API 40 41 /** 42 * Constants for normalization modes. 43 * For details about standard Unicode normalization forms 44 * and about the algorithms which are also used with custom mapping tables 45 * see http://www.unicode.org/unicode/reports/tr15/ 46 * @stable ICU 4.4 47 */ 48 typedef enum { 49 /** 50 * Decomposition followed by composition. 51 * Same as standard NFC when using an "nfc" instance. 52 * Same as standard NFKC when using an "nfkc" instance. 53 * For details about standard Unicode normalization forms 54 * see http://www.unicode.org/unicode/reports/tr15/ 55 * @stable ICU 4.4 56 */ 57 UNORM2_COMPOSE, 58 /** 59 * Map, and reorder canonically. 60 * Same as standard NFD when using an "nfc" instance. 61 * Same as standard NFKD when using an "nfkc" instance. 62 * For details about standard Unicode normalization forms 63 * see http://www.unicode.org/unicode/reports/tr15/ 64 * @stable ICU 4.4 65 */ 66 UNORM2_DECOMPOSE, 67 /** 68 * "Fast C or D" form. 69 * If a string is in this form, then further decomposition
without reordering
70 * would yield the same form as DECOMPOSE. 71 * Text in "Fast C or D" form can be processed efficiently with data tables 72 * that are "canonically closed", that is, that provide equivalent data for 73 * equivalent text, without having to be fully normalized. 74 * Not a standard Unicode normalization form. 75 * Not a unique form: Different FCD strings can be canonically equivalent. 76 * For details see http://www.unicode.org/notes/tn5/#FCD 77 * @stable ICU 4.4 78 */ 79 UNORM2_FCD, 80 /** 81 * Compose only contiguously. 82 * Also known as "FCC" or "Fast C Contiguous". 83 * The result will often but not always be in NFC. 84 * The result will conform to FCD which is useful for processing. 85 * Not a standard Unicode normalization form. 86 * For details see http://www.unicode.org/notes/tn5/#FCC 87 * @stable ICU 4.4 88 */ 89 UNORM2_COMPOSE_CONTIGUOUS 90 } UNormalization2Mode; 91 92 /** 93 * Result values for normalization quick check functions. 94 * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms 95 * @stable ICU 2.0 96 */ 97 typedef enum UNormalizationCheckResult { 98 /** 99 * The input string is not in the normalization form. 100 * @stable ICU 2.0 101 */ 102 UNORM_NO, 103 /** 104 * The input string is in the normalization form. 105 * @stable ICU 2.0 106 */ 107 UNORM_YES, 108 /** 109 * The input string may or may not be in the normalization form. 110 * This value is only returned for composition forms like NFC and FCC, 111 * when a backward-combining character is found for which the surrounding text 112 * would have to be analyzed further. 113 * @stable ICU 2.0 114 */ 115 UNORM_MAYBE 116 } UNormalizationCheckResult; 117 118 /** 119 * Opaque C service object type for the new normalization API. 120 * @stable ICU 4.4 121 */ 122 struct UNormalizer2; 123 typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ 124 125 #if !UCONFIG_NO_NORMALIZATION 126 127 /** 128 * Returns a UNormalizer2 instance for Unicode NFC normalization. 129 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). 130 * Returns an unmodifiable singleton instance. Do not delete it. 131 * @param pErrorCode Standard ICU error code. Its input value must 132 * pass the U_SUCCESS() test, or else the function returns 133 * immediately. Check for U_FAILURE() on output or use with 134 * function chaining. (See User Guide for details.) 135 * @return the requested Normalizer2, if successful 136 * @stable ICU 49 137 */ 138 U_CAPI const UNormalizer2 * U_EXPORT2 139 unorm2_getNFCInstance(UErrorCode *pErrorCode); 140 141 /** 142 * Returns a UNormalizer2 instance for Unicode NFD normalization. 143 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). 144 * Returns an unmodifiable singleton instance. Do not delete it. 145 * @param pErrorCode Standard ICU error code. Its input value must 146 * pass the U_SUCCESS() test, or else the function returns 147 * immediately. Check for U_FAILURE() on output or use with 148 * function chaining. (See User Guide for details.) 149 * @return the requested Normalizer2, if successful 150 * @stable ICU 49 151 */ 152 U_CAPI const UNormalizer2 * U_EXPORT2 153 unorm2_getNFDInstance(UErrorCode *pErrorCode); 154 155 /** 156 * Returns a UNormalizer2 instance for Unicode NFKC normalization. 157 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). 158 * Returns an unmodifiable singleton instance. Do not delete it. 159 * @param pErrorCode Standard ICU error code. Its input value must 160 * pass the U_SUCCESS() test, or else the function returns 161 * immediately. Check for U_FAILURE() on output or use with 162 * function chaining. (See User Guide for details.) 163 * @return the requested Normalizer2, if successful 164 * @stable ICU 49 165 */ 166 U_CAPI const UNormalizer2 * U_EXPORT2 167 unorm2_getNFKCInstance(UErrorCode *pErrorCode); 168 169 /** 170 * Returns a UNormalizer2 instance for Unicode NFKD normalization. 171 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). 172 * Returns an unmodifiable singleton instance. Do not delete it. 173 * @param pErrorCode Standard ICU error code. Its input value must 174 * pass the U_SUCCESS() test, or else the function returns 175 * immediately. Check for U_FAILURE() on output or use with 176 * function chaining. (See User Guide for details.) 177 * @return the requested Normalizer2, if successful 178 * @stable ICU 49 179 */ 180 U_CAPI const UNormalizer2 * U_EXPORT2 181 unorm2_getNFKDInstance(UErrorCode *pErrorCode); 182 183 /** 184 * Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization 185 * which is equivalent to applying the NFKC_Casefold mappings and then NFC. 186 * See https://www.unicode.org/reports/tr44/#NFKC_Casefold 187 * 188 * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). 189 * Returns an unmodifiable singleton instance. Do not delete it. 190 * @param pErrorCode Standard ICU error code. Its input value must 191 * pass the U_SUCCESS() test, or else the function returns 192 * immediately. Check for U_FAILURE() on output or use with 193 * function chaining. (See User Guide for details.) 194 * @return the requested Normalizer2, if successful 195 * @stable ICU 49 196 */ 197 U_CAPI const UNormalizer2 * U_EXPORT2 198 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); 199 200 #ifndef U_HIDE_DRAFT_API 201 /** 202 * Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization 203 * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC. 204 * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold 205 * 206 * Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode). 207 * Returns an unmodifiable singleton instance. Do not delete it. 208 * @param pErrorCode Standard ICU error code. Its input value must 209 * pass the U_SUCCESS() test, or else the function returns 210 * immediately. Check for U_FAILURE() on output or use with 211 * function chaining. (See User Guide for details.) 212 * @return the requested Normalizer2, if successful 213 * @draft ICU 74 214 */ 215 U_CAPI const UNormalizer2 * U_EXPORT2 216 unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode); 217 #endif // U_HIDE_DRAFT_API 218 219 /** 220 * Returns a UNormalizer2 instance which uses the specified data file 221 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 222 * and which composes or decomposes text according to the specified mode. 223 * Returns an unmodifiable singleton instance. Do not delete it. 224 * 225 * Use packageName=NULL for data files that are part of ICU's own data. 226 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 227 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 228 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 229 * 230 * @param packageName NULL for ICU built-in data, otherwise application data package name 231 * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file 232 * @param mode normalization mode (compose or decompose etc.) 233 * @param pErrorCode Standard ICU error code. Its input value must 234 * pass the U_SUCCESS() test, or else the function returns 235 * immediately. Check for U_FAILURE() on output or use with 236 * function chaining. (See User Guide for details.) 237 * @return the requested UNormalizer2, if successful 238 * @stable ICU 4.4 239 */ 240 U_CAPI const UNormalizer2 * U_EXPORT2 241 unorm2_getInstance(const char *packageName, 242 const char *name, 243 UNormalization2Mode mode, 244 UErrorCode *pErrorCode); 245 246 /** 247 * Constructs a filtered normalizer wrapping any UNormalizer2 instance 248 * and a filter set. 249 * Both are aliased and must not be modified or deleted while this object 250 * is used. 251 * The filter set should be frozen; otherwise the performance will suffer greatly. 252 * @param norm2 wrapped UNormalizer2 instance 253 * @param filterSet USet which determines the characters to be normalized 254 * @param pErrorCode Standard ICU error code. Its input value must 255 * pass the U_SUCCESS() test, or else the function returns 256 * immediately. Check for U_FAILURE() on output or use with 257 * function chaining. (See User Guide for details.) 258 * @return the requested UNormalizer2, if successful 259 * @stable ICU 4.4 260 */ 261 U_CAPI UNormalizer2 * U_EXPORT2 262 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); 263 264 /** 265 * Closes a UNormalizer2 instance from unorm2_openFiltered(). 266 * Do not close instances from unorm2_getInstance()! 267 * @param norm2 UNormalizer2 instance to be closed 268 * @stable ICU 4.4 269 */ 270 U_CAPI void U_EXPORT2 271 unorm2_close(UNormalizer2 *norm2); 272 273 #if U_SHOW_CPLUSPLUS_API 274 275 U_NAMESPACE_BEGIN 276 277 /** 278 * \class LocalUNormalizer2Pointer 279 * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). 280 * For most methods see the LocalPointerBase base class. 281 * 282 * @see LocalPointerBase 283 * @see LocalPointer 284 * @stable ICU 4.4 285 */ 286 U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); 287 288 U_NAMESPACE_END 289 290 #endif 291 292 /** 293 * Writes the normalized form of the source string to the destination string 294 * (replacing its contents) and returns the length of the destination string. 295 * The source and destination strings must be different buffers. 296 * @param norm2 UNormalizer2 instance 297 * @param src source string 298 * @param length length of the source string, or -1 if NUL-terminated 299 * @param dest destination string; its contents is replaced with normalized src 300 * @param capacity number of UChars that can be written to dest 301 * @param pErrorCode Standard ICU error code. Its input value must 302 * pass the U_SUCCESS() test, or else the function returns 303 * immediately. Check for U_FAILURE() on output or use with 304 * function chaining. (See User Guide for details.) 305 * @return dest 306 * @stable ICU 4.4 307 */ 308 U_CAPI int32_t U_EXPORT2 309 unorm2_normalize(const UNormalizer2 *norm2, 310 const UChar *src, int32_t length, 311 UChar *dest, int32_t capacity, 312 UErrorCode *pErrorCode); 313 /** 314 * Appends the normalized form of the second string to the first string 315 * (merging them at the boundary) and returns the length of the first string. 316 * The result is normalized if the first string was normalized. 317 * The first and second strings must be different buffers. 318 * @param norm2 UNormalizer2 instance 319 * @param first string, should be normalized 320 * @param firstLength length of the first string, or -1 if NUL-terminated 321 * @param firstCapacity number of UChars that can be written to first 322 * @param second string, will be normalized 323 * @param secondLength length of the source string, or -1 if NUL-terminated 324 * @param pErrorCode Standard ICU error code. Its input value must 325 * pass the U_SUCCESS() test, or else the function returns 326 * immediately. Check for U_FAILURE() on output or use with 327 * function chaining. (See User Guide for details.) 328 * @return first 329 * @stable ICU 4.4 330 */ 331 U_CAPI int32_t U_EXPORT2 332 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 333 UChar *first, int32_t firstLength, int32_t firstCapacity, 334 const UChar *second, int32_t secondLength, 335 UErrorCode *pErrorCode); 336 /** 337 * Appends the second string to the first string 338 * (merging them at the boundary) and returns the length of the first string. 339 * The result is normalized if both the strings were normalized. 340 * The first and second strings must be different buffers. 341 * @param norm2 UNormalizer2 instance 342 * @param first string, should be normalized 343 * @param firstLength length of the first string, or -1 if NUL-terminated 344 * @param firstCapacity number of UChars that can be written to first 345 * @param second string, should be normalized 346 * @param secondLength length of the source string, or -1 if NUL-terminated 347 * @param pErrorCode Standard ICU error code. Its input value must 348 * pass the U_SUCCESS() test, or else the function returns 349 * immediately. Check for U_FAILURE() on output or use with 350 * function chaining. (See User Guide for details.) 351 * @return first 352 * @stable ICU 4.4 353 */ 354 U_CAPI int32_t U_EXPORT2 355 unorm2_append(const UNormalizer2 *norm2, 356 UChar *first, int32_t firstLength, int32_t firstCapacity, 357 const UChar *second, int32_t secondLength, 358 UErrorCode *pErrorCode); 359 360 /** 361 * Gets the decomposition mapping of c. 362 * Roughly equivalent to normalizing the String form of c 363 * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function 364 * returns a negative value and does not write a string 365 * if c does not have a decomposition mapping in this instance's data. 366 * This function is independent of the mode of the UNormalizer2. 367 * @param norm2 UNormalizer2 instance 368 * @param c code point 369 * @param decomposition String buffer which will be set to c's 370 * decomposition mapping, if there is one. 371 * @param capacity number of UChars that can be written to decomposition 372 * @param pErrorCode Standard ICU error code. Its input value must 373 * pass the U_SUCCESS() test, or else the function returns 374 * immediately. Check for U_FAILURE() on output or use with 375 * function chaining. (See User Guide for details.) 376 * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value 377 * @stable ICU 4.6 378 */ 379 U_CAPI int32_t U_EXPORT2 380 unorm2_getDecomposition(const UNormalizer2 *norm2, 381 UChar32 c, UChar *decomposition, int32_t capacity, 382 UErrorCode *pErrorCode); 383 384 /** 385 * Gets the raw decomposition mapping of c. 386 * 387 * This is similar to the unorm2_getDecomposition() function but returns the 388 * raw decomposition mapping as specified in UnicodeData.txt or 389 * (for custom data) in the mapping files processed by the gennorm2 tool. 390 * By contrast, unorm2_getDecomposition() returns the processed, 391 * recursively-decomposed version of this mapping. 392 * 393 * When used on a standard NFKC Normalizer2 instance, 394 * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 395 * 396 * When used on a standard NFC Normalizer2 instance, 397 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 398 * in this case, the result contains either one or two code points (=1..4 UChars). 399 * 400 * This function is independent of the mode of the UNormalizer2. 401 * @param norm2 UNormalizer2 instance 402 * @param c code point 403 * @param decomposition String buffer which will be set to c's 404 * raw decomposition mapping, if there is one. 405 * @param capacity number of UChars that can be written to decomposition 406 * @param pErrorCode Standard ICU error code. Its input value must 407 * pass the U_SUCCESS() test, or else the function returns 408 * immediately. Check for U_FAILURE() on output or use with 409 * function chaining. (See User Guide for details.) 410 * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value 411 * @stable ICU 49 412 */ 413 U_CAPI int32_t U_EXPORT2 414 unorm2_getRawDecomposition(const UNormalizer2 *norm2, 415 UChar32 c, UChar *decomposition, int32_t capacity, 416 UErrorCode *pErrorCode); 417 418 /** 419 * Performs pairwise composition of a & b and returns the composite if there is one. 420 * 421 * Returns a composite code point c only if c has a two-way mapping to a+b. 422 * In standard Unicode normalization, this means that 423 * c has a canonical decomposition to a+b 424 * and c does not have the Full_Composition_Exclusion property. 425 * 426 * This function is independent of the mode of the UNormalizer2. 427 * @param norm2 UNormalizer2 instance 428 * @param a A (normalization starter) code point. 429 * @param b Another code point. 430 * @return The non-negative composite code point if there is one; otherwise a negative value. 431 * @stable ICU 49 432 */ 433 U_CAPI UChar32 U_EXPORT2 434 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); 435 436 /** 437 * Gets the combining class of c. 438 * The default implementation returns 0 439 * but all standard implementations return the Unicode Canonical_Combining_Class value. 440 * @param norm2 UNormalizer2 instance 441 * @param c code point 442 * @return c's combining class 443 * @stable ICU 49 444 */ 445 U_CAPI uint8_t U_EXPORT2 446 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); 447 448 /** 449 * Tests if the string is normalized. 450 * Internally, in cases where the quickCheck() method would return "maybe" 451 * (which is only possible for the two COMPOSE modes) this method 452 * resolves to "yes" or "no" to provide a definitive result, 453 * at the cost of doing more work in those cases. 454 * @param norm2 UNormalizer2 instance 455 * @param s input string 456 * @param length length of the string, or -1 if NUL-terminated 457 * @param pErrorCode Standard ICU error code. Its input value must 458 * pass the U_SUCCESS() test, or else the function returns 459 * immediately. Check for U_FAILURE() on output or use with 460 * function chaining. (See User Guide for details.) 461 * @return true if s is normalized 462 * @stable ICU 4.4 463 */ 464 U_CAPI UBool U_EXPORT2 465 unorm2_isNormalized(const UNormalizer2 *norm2, 466 const UChar *s, int32_t length, 467 UErrorCode *pErrorCode); 468 469 /** 470 * Tests if the string is normalized. 471 * For the two COMPOSE modes, the result could be "maybe" in cases that 472 * would take a little more work to resolve definitively. 473 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 474 * combination of quick check + normalization, to avoid 475 * re-checking the "yes" prefix. 476 * @param norm2 UNormalizer2 instance 477 * @param s input string 478 * @param length length of the string, or -1 if NUL-terminated 479 * @param pErrorCode Standard ICU error code. Its input value must 480 * pass the U_SUCCESS() test, or else the function returns 481 * immediately. Check for U_FAILURE() on output or use with 482 * function chaining. (See User Guide for details.) 483 * @return UNormalizationCheckResult 484 * @stable ICU 4.4 485 */ 486 U_CAPI UNormalizationCheckResult U_EXPORT2 487 unorm2_quickCheck(const UNormalizer2 *norm2, 488 const UChar *s, int32_t length, 489 UErrorCode *pErrorCode); 490 491 /** 492 * Returns the end of the normalized substring of the input string. 493 * In other words, with
end=spanQuickCheckYes(s, ec);
494 * the substring
UnicodeString(s, 0, end)
495 * will pass the quick check with a "yes" result. 496 * 497 * The returned end index is usually one or more characters before the 498 * "no" or "maybe" character: The end index is at a normalization boundary. 499 * (See the class documentation for more about normalization boundaries.) 500 * 501 * When the goal is a normalized string and most input strings are expected 502 * to be normalized already, then call this method, 503 * and if it returns a prefix shorter than the input string, 504 * copy that prefix and use normalizeSecondAndAppend() for the remainder. 505 * @param norm2 UNormalizer2 instance 506 * @param s input string 507 * @param length length of the string, or -1 if NUL-terminated 508 * @param pErrorCode Standard ICU error code. Its input value must 509 * pass the U_SUCCESS() test, or else the function returns 510 * immediately. Check for U_FAILURE() on output or use with 511 * function chaining. (See User Guide for details.) 512 * @return "yes" span end index 513 * @stable ICU 4.4 514 */ 515 U_CAPI int32_t U_EXPORT2 516 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 517 const UChar *s, int32_t length, 518 UErrorCode *pErrorCode); 519 520 /** 521 * Tests if the character always has a normalization boundary before it, 522 * regardless of context. 523 * For details see the Normalizer2 base class documentation. 524 * @param norm2 UNormalizer2 instance 525 * @param c character to test 526 * @return true if c has a normalization boundary before it 527 * @stable ICU 4.4 528 */ 529 U_CAPI UBool U_EXPORT2 530 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); 531 532 /** 533 * Tests if the character always has a normalization boundary after it, 534 * regardless of context. 535 * For details see the Normalizer2 base class documentation. 536 * @param norm2 UNormalizer2 instance 537 * @param c character to test 538 * @return true if c has a normalization boundary after it 539 * @stable ICU 4.4 540 */ 541 U_CAPI UBool U_EXPORT2 542 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); 543 544 /** 545 * Tests if the character is normalization-inert. 546 * For details see the Normalizer2 base class documentation. 547 * @param norm2 UNormalizer2 instance 548 * @param c character to test 549 * @return true if c is normalization-inert 550 * @stable ICU 4.4 551 */ 552 U_CAPI UBool U_EXPORT2 553 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); 554 555 /** 556 * Compares two strings for canonical equivalence. 557 * Further options include case-insensitive comparison and 558 * code point order (as opposed to code unit order). 559 * 560 * Canonical equivalence between two strings is defined as their normalized 561 * forms (NFD or NFC) being identical. 562 * This function compares strings incrementally instead of normalizing 563 * (and optionally case-folding) both strings entirely, 564 * improving performance significantly. 565 * 566 * Bulk normalization is only necessary if the strings do not fulfill the FCD 567 * conditions. Only in this case, and only if the strings are relatively long, 568 * is memory allocated temporarily. 569 * For FCD strings and short non-FCD strings there is no memory allocation. 570 * 571 * Semantically, this is equivalent to 572 * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) 573 * where code point order and foldCase are all optional. 574 * 575 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match 576 * the case folding must be performed first, then the normalization. 577 * 578 * @param s1 First source string. 579 * @param length1 Length of first source string, or -1 if NUL-terminated. 580 * 581 * @param s2 Second source string. 582 * @param length2 Length of second source string, or -1 if NUL-terminated. 583 * 584 * @param options A bit set of options: 585 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 586 * Case-sensitive comparison in code unit order, and the input strings 587 * are quick-checked for FCD. 588 * 589 * - UNORM_INPUT_IS_FCD 590 * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. 591 * If not set, the function will quickCheck for FCD 592 * and normalize if necessary. 593 * 594 * - U_COMPARE_CODE_POINT_ORDER 595 * Set to choose code point order instead of code unit order 596 * (see u_strCompare for details). 597 * 598 * - U_COMPARE_IGNORE_CASE 599 * Set to compare strings case-insensitively using case folding, 600 * instead of case-sensitively. 601 * If set, then the following case folding options are used. 602 * 603 * - Options as used with case-insensitive comparisons, currently: 604 * 605 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 606 * (see u_strCaseCompare for details) 607 * 608 * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT 609 * 610 * @param pErrorCode ICU error code in/out parameter. 611 * Must fulfill U_SUCCESS before the function call. 612 * @return <0 or 0 or >0 as usual for string comparisons 613 * 614 * @see unorm_normalize 615 * @see UNORM_FCD 616 * @see u_strCompare 617 * @see u_strCaseCompare 618 * 619 * @stable ICU 2.2 620 */ 621 U_CAPI int32_t U_EXPORT2 622 unorm_compare(const UChar *s1, int32_t length1, 623 const UChar *s2, int32_t length2, 624 uint32_t options, 625 UErrorCode *pErrorCode); 626 627 #endif /* !UCONFIG_NO_NORMALIZATION */ 628 #endif /* __UNORM2_H__ */
Contact us
|
About us
|
Term of use
|
Copyright © 2000-2025 MyWebUniversity.com ™