Normalizer
154 * @param str The string to be normalized. The normalization 155 * will start at the beginning of the string. 156 * 157 * @param mode The normalization mode. 158 * @deprecated ICU 56 Use Normalizer2 instead. 159 */ 160 Normalizer(const UnicodeString& str, UNormalizationMode mode); 161 162 /** 163 * Creates a new Normalizer object for iterating over the 164 * normalized form of a given string. 165 *
166 * @param str The string to be normalized. The normalization 167 * will start at the beginning of the string. 168 * 169 * @param length Length of the string, or -1 if NUL-terminated. 170 * @param mode The normalization mode. 171 * @deprecated ICU 56 Use Normalizer2 instead. 172 */ 173 Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode); 174 175 /** 176 * Creates a new Normalizer object for iterating over the 177 * normalized form of the given text. 178 *
179 * @param iter The input text to be normalized. The normalization 180 * will start at the beginning of the string. 181 * 182 * @param mode The normalization mode. 183 * @deprecated ICU 56 Use Normalizer2 instead. 184 */ 185 Normalizer(const CharacterIterator& iter, UNormalizationMode mode); 186 #endif /* U_HIDE_DEPRECATED_API */ 187 188 #ifndef U_FORCE_HIDE_DEPRECATED_API 189 /** 190 * Copy constructor. 191 * @param copy The object to be copied. 192 * @deprecated ICU 56 Use Normalizer2 instead. 193 */ 194 Normalizer(const Normalizer& copy); 195 196 /** 197 * Destructor 198 * @deprecated ICU 56 Use Normalizer2 instead. 199 */ 200 virtual ~Normalizer(); 201 #endif // U_FORCE_HIDE_DEPRECATED_API 202 203 //------------------------------------------------------------------------- 204 // Static utility methods 205 //------------------------------------------------------------------------- 206 207 #ifndef U_HIDE_DEPRECATED_API 208 /** 209 * Normalizes a UnicodeString according to the specified normalization mode. 210 * This is a wrapper for unorm_normalize(), using UnicodeString's. 211 * 212 * The options parameter specifies which optional 213 * Normalizer features are to be enabled for this operation. 214 * 215 * @param source the input string to be normalized. 216 * @param mode the normalization mode 217 * @param options the optional features to be enabled (0 for no options) 218 * @param result The normalized string (on output). 219 * @param status The error code. 220 * @deprecated ICU 56 Use Normalizer2 instead. 221 */ 222 static void U_EXPORT2 normalize(const UnicodeString& source, 223 UNormalizationMode mode, int32_t options, 224 UnicodeString& result, 225 UErrorCode &status); 226 227 /** 228 * Compose a UnicodeString. 229 * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC. 230 * This is a wrapper for unorm_normalize(), using UnicodeString's. 231 * 232 * The options parameter specifies which optional 233 * Normalizer features are to be enabled for this operation. 234 * 235 * @param source the string to be composed. 236 * @param compat Perform compatibility decomposition before composition. 237 * If this argument is false, only canonical 238 * decomposition will be performed. 239 * @param options the optional features to be enabled (0 for no options) 240 * @param result The composed string (on output). 241 * @param status The error code. 242 * @deprecated ICU 56 Use Normalizer2 instead. 243 */ 244 static void U_EXPORT2 compose(const UnicodeString& source, 245 UBool compat, int32_t options, 246 UnicodeString& result, 247 UErrorCode &status); 248 249 /** 250 * Static method to decompose a UnicodeString. 251 * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD. 252 * This is a wrapper for unorm_normalize(), using UnicodeString's. 253 * 254 * The options parameter specifies which optional 255 * Normalizer features are to be enabled for this operation. 256 * 257 * @param source the string to be decomposed. 258 * @param compat Perform compatibility decomposition. 259 * If this argument is false, only canonical 260 * decomposition will be performed. 261 * @param options the optional features to be enabled (0 for no options) 262 * @param result The decomposed string (on output). 263 * @param status The error code. 264 * @deprecated ICU 56 Use Normalizer2 instead. 265 */ 266 static void U_EXPORT2 decompose(const UnicodeString& source, 267 UBool compat, int32_t options, 268 UnicodeString& result, 269 UErrorCode &status); 270 271 /** 272 * Performing quick check on a string, to quickly determine if the string is 273 * in a particular normalization format. 274 * This is a wrapper for unorm_quickCheck(), using a UnicodeString. 275 * 276 * Three types of result can be returned UNORM_YES, UNORM_NO or 277 * UNORM_MAYBE. Result UNORM_YES indicates that the argument 278 * string is in the desired normalized format, UNORM_NO determines that 279 * argument string is not in the desired normalized format. A 280 * UNORM_MAYBE result indicates that a more thorough check is required, 281 * the user may have to put the string in its normalized form and compare the 282 * results. 283 * @param source string for determining if it is in a normalized format 284 * @param mode normalization format 285 * @param status A reference to a UErrorCode to receive any errors 286 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 287 * 288 * @see isNormalized 289 * @deprecated ICU 56 Use Normalizer2 instead. 290 */ 291 static inline UNormalizationCheckResult 292 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); 293 294 /** 295 * Performing quick check on a string; same as the other version of quickCheck 296 * but takes an extra options parameter like most normalization functions. 297 * 298 * @param source string for determining if it is in a normalized format 299 * @param mode normalization format 300 * @param options the optional features to be enabled (0 for no options) 301 * @param status A reference to a UErrorCode to receive any errors 302 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 303 * 304 * @see isNormalized 305 * @deprecated ICU 56 Use Normalizer2 instead. 306 */ 307 static UNormalizationCheckResult 308 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); 309 310 /** 311 * Test if a string is in a given normalization form. 312 * This is semantically equivalent to source.equals(normalize(source, mode)) . 313 * 314 * Unlike unorm_quickCheck(), this function returns a definitive result, 315 * never a "maybe". 316 * For NFD, NFKD, and FCD, both functions work exactly the same. 317 * For NFC and NFKC where quickCheck may return "maybe", this function will 318 * perform further tests to arrive at a true/false result. 319 * 320 * @param src String that is to be tested if it is in a normalization format. 321 * @param mode Which normalization form to test for. 322 * @param errorCode ICU error code in/out parameter. 323 * Must fulfill U_SUCCESS before the function call. 324 * @return Boolean value indicating whether the source string is in the 325 * "mode" normalization form. 326 * 327 * @see quickCheck 328 * @deprecated ICU 56 Use Normalizer2 instead. 329 */ 330 static inline UBool 331 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); 332 333 /** 334 * Test if a string is in a given normalization form; same as the other version of isNormalized 335 * but takes an extra options parameter like most normalization functions. 336 * 337 * @param src String that is to be tested if it is in a normalization format. 338 * @param mode Which normalization form to test for. 339 * @param options the optional features to be enabled (0 for no options) 340 * @param errorCode ICU error code in/out parameter. 341 * Must fulfill U_SUCCESS before the function call. 342 * @return Boolean value indicating whether the source string is in the 343 * "mode" normalization form. 344 * 345 * @see quickCheck 346 * @deprecated ICU 56 Use Normalizer2 instead. 347 */ 348 static UBool 349 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); 350 351 /** 352 * Concatenate normalized strings, making sure that the result is normalized as well. 353 * 354 * If both the left and the right strings are in 355 * the normalization form according to "mode/options", 356 * then the result will be 357 * 358 * \code 359 * dest=normalize(left+right, mode, options) 360 * \endcode 361 * 362 * For details see unorm_concatenate in unorm.h. 363 * 364 * @param left Left source string. 365 * @param right Right source string. 366 * @param result The output string. 367 * @param mode The normalization mode. 368 * @param options A bit set of normalization options. 369 * @param errorCode ICU error code in/out parameter. 370 * Must fulfill U_SUCCESS before the function call. 371 * @return result 372 * 373 * @see unorm_concatenate 374 * @see normalize 375 * @see unorm_next 376 * @see unorm_previous 377 * 378 * @deprecated ICU 56 Use Normalizer2 instead. 379 */ 380 static UnicodeString & 381 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right, 382 UnicodeString &result, 383 UNormalizationMode mode, int32_t options, 384 UErrorCode &errorCode); 385 #endif /* U_HIDE_DEPRECATED_API */ 386 387 /** 388 * Compare two strings for canonical equivalence. 389 * Further options include case-insensitive comparison and 390 * code point order (as opposed to code unit order). 391 * 392 * Canonical equivalence between two strings is defined as their normalized 393 * forms (NFD or NFC) being identical. 394 * This function compares strings incrementally instead of normalizing 395 * (and optionally case-folding) both strings entirely, 396 * improving performance significantly. 397 * 398 * Bulk normalization is only necessary if the strings do not fulfill the FCD 399 * conditions. Only in this case, and only if the strings are relatively long, 400 * is memory allocated temporarily. 401 * For FCD strings and short non-FCD strings there is no memory allocation. 402 * 403 * Semantically, this is equivalent to 404 * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) 405 * where code point order and foldCase are all optional. 406 * 407 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match 408 * the case folding must be performed first, then the normalization. 409 * 410 * @param s1 First source string. 411 * @param s2 Second source string. 412 * 413 * @param options A bit set of options: 414 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 415 * Case-sensitive comparison in code unit order, and the input strings 416 * are quick-checked for FCD. 417 * 418 * - UNORM_INPUT_IS_FCD 419 * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. 420 * If not set, the function will quickCheck for FCD 421 * and normalize if necessary. 422 * 423 * - U_COMPARE_CODE_POINT_ORDER 424 * Set to choose code point order instead of code unit order 425 * (see u_strCompare for details). 426 * 427 * - U_COMPARE_IGNORE_CASE 428 * Set to compare strings case-insensitively using case folding, 429 * instead of case-sensitively. 430 * If set, then the following case folding options are used. 431 * 432 * - Options as used with case-insensitive comparisons, currently: 433 * 434 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 435 * (see u_strCaseCompare for details) 436 * 437 * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT 438 * 439 * @param errorCode ICU error code in/out parameter. 440 * Must fulfill U_SUCCESS before the function call. 441 * @return <0 or 0 or >0 as usual for string comparisons 442 * 443 * @see unorm_compare 444 * @see normalize 445 * @see UNORM_FCD 446 * @see u_strCompare 447 * @see u_strCaseCompare 448 * 449 * @stable ICU 2.2 450 */ 451 static inline int32_t 452 compare(const UnicodeString &s1, const UnicodeString &s2, 453 uint32_t options, 454 UErrorCode &errorCode); 455 456 #ifndef U_HIDE_DEPRECATED_API 457 //------------------------------------------------------------------------- 458 // Iteration API 459 //------------------------------------------------------------------------- 460 461 /** 462 * Return the current character in the normalized text. 463 * current() may need to normalize some text at getIndex(). 464 * The getIndex() is not changed. 465 * 466 * @return the current normalized code point 467 * @deprecated ICU 56 Use Normalizer2 instead. 468 */ 469 UChar32 current(void); 470 471 /** 472 * Return the first character in the normalized text. 473 * This is equivalent to setIndexOnly(startIndex()) followed by next(). 474 * (Post-increment semantics.) 475 * 476 * @return the first normalized code point 477 * @deprecated ICU 56 Use Normalizer2 instead. 478 */ 479 UChar32 first(void); 480 481 /** 482 * Return the last character in the normalized text. 483 * This is equivalent to setIndexOnly(endIndex()) followed by previous(). 484 * (Pre-decrement semantics.) 485 * 486 * @return the last normalized code point 487 * @deprecated ICU 56 Use Normalizer2 instead. 488 */ 489 UChar32 last(void); 490 491 /** 492 * Return the next character in the normalized text. 493 * (Post-increment semantics.) 494 * If the end of the text has already been reached, DONE is returned. 495 * The DONE value could be confused with a U+FFFF non-character code point 496 * in the text. If this is possible, you can test getIndex()startIndex() || first()!=DONE). (Calling first() will change 514 * the iterator state!) 515 * 516 * The C API unorm_previous() is more efficient and does not have this ambiguity. 517 * 518 * @return the previous normalized code point 519 * @deprecated ICU 56 Use Normalizer2 instead. 520 */ 521 UChar32 previous(void); 522 523 /** 524 * Set the iteration position in the input text that is being normalized, 525 * without any immediate normalization. 526 * After setIndexOnly(), getIndex() will return the same index that is 527 * specified here. 528 * 529 * @param index the desired index in the input text. 530 * @deprecated ICU 56 Use Normalizer2 instead. 531 */ 532 void setIndexOnly(int32_t index); 533 534 /** 535 * Reset the index to the beginning of the text. 536 * This is equivalent to setIndexOnly(startIndex)). 537 * @deprecated ICU 56 Use Normalizer2 instead. 538 */ 539 void reset(void); 540 541 /** 542 * Retrieve the current iteration position in the input text that is 543 * being normalized. 544 * 545 * A following call to next() will return a normalized code point from 546 * the input text at or after this index. 547 * 548 * After a call to previous(), getIndex() will point at or before the 549 * position in the input text where the normalized code point 550 * was returned from with previous(). 551 * 552 * @return the current index in the input text 553 * @deprecated ICU 56 Use Normalizer2 instead. 554 */ 555 int32_t getIndex(void) const; 556 557 /** 558 * Retrieve the index of the start of the input text. This is the begin index 559 * of the CharacterIterator or the start (i.e. index 0) of the string 560 * over which this Normalizer is iterating. 561 * 562 * @return the smallest index in the input text where the Normalizer operates 563 * @deprecated ICU 56 Use Normalizer2 instead. 564 */ 565 int32_t startIndex(void) const; 566 567 /** 568 * Retrieve the index of the end of the input text. This is the end index 569 * of the CharacterIterator or the length of the string 570 * over which this Normalizer is iterating. 571 * This end index is exclusive, i.e., the Normalizer operates only on characters 572 * before this index. 573 * 574 * @return the first index in the input text where the Normalizer does not operate 575 * @deprecated ICU 56 Use Normalizer2 instead. 576 */ 577 int32_t endIndex(void) const; 578 579 /** 580 * Returns true when both iterators refer to the same character in the same 581 * input text. 582 * 583 * @param that a Normalizer object to compare this one to 584 * @return comparison result 585 * @deprecated ICU 56 Use Normalizer2 instead. 586 */ 587 bool operator==(const Normalizer& that) const; 588 589 /** 590 * Returns false when both iterators refer to the same character in the same 591 * input text. 592 * 593 * @param that a Normalizer object to compare this one to 594 * @return comparison result 595 * @deprecated ICU 56 Use Normalizer2 instead. 596 */ 597 inline bool operator!=(const Normalizer& that) const; 598 599 /** 600 * Returns a pointer to a new Normalizer that is a clone of this one. 601 * The caller is responsible for deleting the new clone. 602 * @return a pointer to a new Normalizer 603 * @deprecated ICU 56 Use Normalizer2 instead. 604 */ 605 Normalizer* clone() const; 606 607 /** 608 * Generates a hash code for this iterator. 609 * 610 * @return the hash code 611 * @deprecated ICU 56 Use Normalizer2 instead. 612 */ 613 int32_t hashCode(void) const; 614 615 //------------------------------------------------------------------------- 616 // Property access methods 617 //------------------------------------------------------------------------- 618 619 /** 620 * Set the normalization mode for this object. 621 * 622 * Note:If the normalization mode is changed while iterating 623 * over a string, calls to {@link #next() } and {@link #previous() } may 624 * return previously buffers characters in the old normalization mode 625 * until the iteration is able to re-sync at the next base character. 626 * It is safest to call {@link #setIndexOnly }, {@link #reset() }, 627 * {@link #setText }, {@link #first() }, 628 * {@link #last() }, etc. after calling setMode. 629 * 630 * @param newMode the new mode for this Normalizer. 631 * @see #getUMode 632 * @deprecated ICU 56 Use Normalizer2 instead. 633 */ 634 void setMode(UNormalizationMode newMode); 635 636 /** 637 * Return the normalization mode for this object. 638 * 639 * This is an unusual name because there used to be a getMode() that 640 * returned a different type. 641 * 642 * @return the mode for this Normalizer 643 * @see #setMode 644 * @deprecated ICU 56 Use Normalizer2 instead. 645 */ 646 UNormalizationMode getUMode(void) const; 647 648 /** 649 * Set options that affect this Normalizer's operation. 650 * Options do not change the basic composition or decomposition operation 651 * that is being performed, but they control whether 652 * certain optional portions of the operation are done. 653 * Currently the only available option is obsolete. 654 * 655 * It is possible to specify multiple options that are all turned on or off. 656 * 657 * @param option the option(s) whose value is/are to be set. 658 * @param value the new setting for the option. Use true to 659 * turn the option(s) on and false to turn it/them off. 660 * 661 * @see #getOption 662 * @deprecated ICU 56 Use Normalizer2 instead. 663 */ 664 void setOption(int32_t option, 665 UBool value); 666 667 /** 668 * Determine whether an option is turned on or off. 669 * If multiple options are specified, then the result is true if any 670 * of them are set. 671 * 672 * @param option the option(s) that are to be checked 673 * @return true if any of the option(s) are set 674 * @see #setOption 675 * @deprecated ICU 56 Use Normalizer2 instead. 676 */ 677 UBool getOption(int32_t option) const; 678 679 /** 680 * Set the input text over which this Normalizer will iterate. 681 * The iteration position is set to the beginning. 682 * 683 * @param newText a string that replaces the current input text 684 * @param status a UErrorCode 685 * @deprecated ICU 56 Use Normalizer2 instead. 686 */ 687 void setText(const UnicodeString& newText, 688 UErrorCode &status); 689 690 /** 691 * Set the input text over which this Normalizer will iterate. 692 * The iteration position is set to the beginning. 693 * 694 * @param newText a CharacterIterator object that replaces the current input text 695 * @param status a UErrorCode 696 * @deprecated ICU 56 Use Normalizer2 instead. 697 */ 698 void setText(const CharacterIterator& newText, 699 UErrorCode &status); 700 701 /** 702 * Set the input text over which this Normalizer will iterate. 703 * The iteration position is set to the beginning. 704 * 705 * @param newText a string that replaces the current input text 706 * @param length the length of the string, or -1 if NUL-terminated 707 * @param status a UErrorCode 708 * @deprecated ICU 56 Use Normalizer2 instead. 709 */ 710 void setText(ConstChar16Ptr newText, 711 int32_t length, 712 UErrorCode &status); 713 /** 714 * Copies the input text into the UnicodeString argument. 715 * 716 * @param result Receives a copy of the text under iteration. 717 * @deprecated ICU 56 Use Normalizer2 instead. 718 */ 719 void getText(UnicodeString& result); 720 721 /** 722 * ICU "poor man's RTTI", returns a UClassID for this class. 723 * @returns a UClassID for this class. 724 * @deprecated ICU 56 Use Normalizer2 instead. 725 */ 726 static UClassID U_EXPORT2 getStaticClassID(); 727 #endif /* U_HIDE_DEPRECATED_API */ 728 729 #ifndef U_FORCE_HIDE_DEPRECATED_API 730 /** 731 * ICU "poor man's RTTI", returns a UClassID for the actual class. 732 * @return a UClassID for the actual class. 733 * @deprecated ICU 56 Use Normalizer2 instead. 734 */ 735 virtual UClassID getDynamicClassID() const override; 736 #endif // U_FORCE_HIDE_DEPRECATED_API 737 738 private: 739 //------------------------------------------------------------------------- 740 // Private functions 741 //------------------------------------------------------------------------- 742 743 Normalizer() = delete; // default constructor not implemented 744 Normalizer &operator=(const Normalizer &that) = delete; // assignment operator not implemented 745 746 // Private utility methods for iteration 747 // For documentation, see the source code 748 UBool nextNormalize(); 749 UBool previousNormalize(); 750 751 void init(); 752 void clearBuffer(void); 753 754 //------------------------------------------------------------------------- 755 // Private data 756 //------------------------------------------------------------------------- 757 758 FilteredNormalizer2*fFilteredNorm2; // owned if not nullptr 759 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 760 UNormalizationMode fUMode; // deprecated 761 int32_t fOptions; 762 763 // The input text and our position in it 764 CharacterIterator *text; 765 766 // The normalization buffer is the result of normalization 767 // of the source in [currentIndex..nextIndex[ . 768 int32_t currentIndex, nextIndex; 769 770 // A buffer for holding intermediate results 771 UnicodeString buffer; 772 int32_t bufferPos; 773 }; 774 775 //------------------------------------------------------------------------- 776 // Inline implementations 777 //------------------------------------------------------------------------- 778 779 #ifndef U_HIDE_DEPRECATED_API 780 inline bool 781 Normalizer::operator!= (const Normalizer& other) const 782 { return ! operator==(other); } 783 784 inline UNormalizationCheckResult 785 Normalizer::quickCheck(const UnicodeString& source, 786 UNormalizationMode mode, 787 UErrorCode &status) { 788 return quickCheck(source, mode, 0, status); 789 } 790 791 inline UBool 792 Normalizer::isNormalized(const UnicodeString& source, 793 UNormalizationMode mode, 794 UErrorCode &status) { 795 return isNormalized(source, mode, 0, status); 796 } 797 #endif /* U_HIDE_DEPRECATED_API */ 798 799 inline int32_t 800 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, 801 uint32_t options, 802 UErrorCode &errorCode) { 803 // all argument checking is done in unorm_compare 804 return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), 805 toUCharPtr(s2.getBuffer()), s2.length(), 806 options, 807 &errorCode); 808 } 809 810 U_NAMESPACE_END 811 812 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 813 814 #endif // NORMLZR_H 815 816 #endif /* U_SHOW_CPLUSPLUS_API */
UnicodeString
options
false
CharacterIterator
622 * Note:If the normalization mode is changed while iterating 623 * over a string, calls to {@link #next() } and {@link #previous() } may 624 * return previously buffers characters in the old normalization mode 625 * until the iteration is able to re-sync at the next base character. 626 * It is safest to call {@link #setIndexOnly }, {@link #reset() }, 627 * {@link #setText }, {@link #first() }, 628 * {@link #last() }, etc. after calling setMode. 629 *
setMode
630 * @param newMode the new mode for this Normalizer. 631 * @see #getUMode 632 * @deprecated ICU 56 Use Normalizer2 instead. 633 */ 634 void setMode(UNormalizationMode newMode); 635 636 /** 637 * Return the normalization mode for this object. 638 * 639 * This is an unusual name because there used to be a getMode() that 640 * returned a different type. 641 * 642 * @return the mode for this Normalizer 643 * @see #setMode 644 * @deprecated ICU 56 Use Normalizer2 instead. 645 */ 646 UNormalizationMode getUMode(void) const; 647 648 /** 649 * Set options that affect this Normalizer's operation. 650 * Options do not change the basic composition or decomposition operation 651 * that is being performed, but they control whether 652 * certain optional portions of the operation are done. 653 * Currently the only available option is obsolete. 654 * 655 * It is possible to specify multiple options that are all turned on or off. 656 * 657 * @param option the option(s) whose value is/are to be set. 658 * @param value the new setting for the option. Use true to 659 * turn the option(s) on and false to turn it/them off. 660 * 661 * @see #getOption 662 * @deprecated ICU 56 Use Normalizer2 instead. 663 */ 664 void setOption(int32_t option, 665 UBool value); 666 667 /** 668 * Determine whether an option is turned on or off. 669 * If multiple options are specified, then the result is true if any 670 * of them are set. 671 *
true
672 * @param option the option(s) that are to be checked 673 * @return true if any of the option(s) are set 674 * @see #setOption 675 * @deprecated ICU 56 Use Normalizer2 instead. 676 */ 677 UBool getOption(int32_t option) const; 678 679 /** 680 * Set the input text over which this Normalizer will iterate. 681 * The iteration position is set to the beginning. 682 * 683 * @param newText a string that replaces the current input text 684 * @param status a UErrorCode 685 * @deprecated ICU 56 Use Normalizer2 instead. 686 */ 687 void setText(const UnicodeString& newText, 688 UErrorCode &status); 689 690 /** 691 * Set the input text over which this Normalizer will iterate. 692 * The iteration position is set to the beginning. 693 * 694 * @param newText a CharacterIterator object that replaces the current input text 695 * @param status a UErrorCode 696 * @deprecated ICU 56 Use Normalizer2 instead. 697 */ 698 void setText(const CharacterIterator& newText, 699 UErrorCode &status); 700 701 /** 702 * Set the input text over which this Normalizer will iterate. 703 * The iteration position is set to the beginning. 704 * 705 * @param newText a string that replaces the current input text 706 * @param length the length of the string, or -1 if NUL-terminated 707 * @param status a UErrorCode 708 * @deprecated ICU 56 Use Normalizer2 instead. 709 */ 710 void setText(ConstChar16Ptr newText, 711 int32_t length, 712 UErrorCode &status); 713 /** 714 * Copies the input text into the UnicodeString argument. 715 * 716 * @param result Receives a copy of the text under iteration. 717 * @deprecated ICU 56 Use Normalizer2 instead. 718 */ 719 void getText(UnicodeString& result); 720 721 /** 722 * ICU "poor man's RTTI", returns a UClassID for this class. 723 * @returns a UClassID for this class. 724 * @deprecated ICU 56 Use Normalizer2 instead. 725 */ 726 static UClassID U_EXPORT2 getStaticClassID(); 727 #endif /* U_HIDE_DEPRECATED_API */ 728 729 #ifndef U_FORCE_HIDE_DEPRECATED_API 730 /** 731 * ICU "poor man's RTTI", returns a UClassID for the actual class. 732 * @return a UClassID for the actual class. 733 * @deprecated ICU 56 Use Normalizer2 instead. 734 */ 735 virtual UClassID getDynamicClassID() const override; 736 #endif // U_FORCE_HIDE_DEPRECATED_API 737 738 private: 739 //------------------------------------------------------------------------- 740 // Private functions 741 //------------------------------------------------------------------------- 742 743 Normalizer() = delete; // default constructor not implemented 744 Normalizer &operator=(const Normalizer &that) = delete; // assignment operator not implemented 745 746 // Private utility methods for iteration 747 // For documentation, see the source code 748 UBool nextNormalize(); 749 UBool previousNormalize(); 750 751 void init(); 752 void clearBuffer(void); 753 754 //------------------------------------------------------------------------- 755 // Private data 756 //------------------------------------------------------------------------- 757 758 FilteredNormalizer2*fFilteredNorm2; // owned if not nullptr 759 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 760 UNormalizationMode fUMode; // deprecated 761 int32_t fOptions; 762 763 // The input text and our position in it 764 CharacterIterator *text; 765 766 // The normalization buffer is the result of normalization 767 // of the source in [currentIndex..nextIndex[ . 768 int32_t currentIndex, nextIndex; 769 770 // A buffer for holding intermediate results 771 UnicodeString buffer; 772 int32_t bufferPos; 773 }; 774 775 //------------------------------------------------------------------------- 776 // Inline implementations 777 //------------------------------------------------------------------------- 778 779 #ifndef U_HIDE_DEPRECATED_API 780 inline bool 781 Normalizer::operator!= (const Normalizer& other) const 782 { return ! operator==(other); } 783 784 inline UNormalizationCheckResult 785 Normalizer::quickCheck(const UnicodeString& source, 786 UNormalizationMode mode, 787 UErrorCode &status) { 788 return quickCheck(source, mode, 0, status); 789 } 790 791 inline UBool 792 Normalizer::isNormalized(const UnicodeString& source, 793 UNormalizationMode mode, 794 UErrorCode &status) { 795 return isNormalized(source, mode, 0, status); 796 } 797 #endif /* U_HIDE_DEPRECATED_API */ 798 799 inline int32_t 800 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, 801 uint32_t options, 802 UErrorCode &errorCode) { 803 // all argument checking is done in unorm_compare 804 return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), 805 toUCharPtr(s2.getBuffer()), s2.length(), 806 options, 807 &errorCode); 808 } 809 810 U_NAMESPACE_END 811 812 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 813 814 #endif // NORMLZR_H 815 816 #endif /* U_SHOW_CPLUSPLUS_API */