66 * "ca" -> the first key is key('c') and second key is key('a'). 67 * "cha" -> the first key is key('ch') and second key is key('a').
\htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and 70 * the third key is key('b'). \endhtmlonly
Example of the iterator usage: (without error checking) 77 *
78 * \code 79 * void CollationElementIterator_Example() 80 * { 81 * UnicodeString str = "This is a test"; 82 * UErrorCode success = U_ZERO_ERROR; 83 * RuleBasedCollator* rbc = 84 * (RuleBasedCollator*) RuleBasedCollator::createInstance(success); 85 * CollationElementIterator* c = 86 * rbc->createCollationElementIterator( str ); 87 * int32_t order = c->next(success); 88 * c->reset(); 89 * order = c->previous(success); 90 * delete c; 91 * delete rbc; 92 * } 93 * \endcode 94 *
96 * The method next() returns the collation order of the next character based on 97 * the comparison level of the collator. The method previous() returns the 98 * collation order of the previous character based on the comparison level of 99 * the collator. The Collation Element Iterator moves only in one direction 100 * between calls to reset(), setOffset(), or setText(). That is, next() 101 * and previous() can not be inter-used. Whenever previous() is to be called after 102 * next() or vice versa, reset(), setOffset() or setText() has to be called first 103 * to reset the status, shifting pointers to either the end or the start of 104 * the string (reset() or setText()), or the specified position (setOffset()). 105 * Hence at the next call of next() or previous(), the first or last collation order, 106 * or collation order at the specified position will be returned. If a change of 107 * direction is done without one of these calls, the result is undefined. 108 *
109 * The result of a forward iterate (next()) and reversed result of the backward 110 * iterate (previous()) on the same string are equivalent, if collation orders 111 * with the value 0 are ignored. 112 * Character based on the comparison level of the collator. A collation order 113 * consists of primary order, secondary order and tertiary order. The data 114 * type of the collation order is int32_t. 115 * 116 * Note, CollationElementIterator should not be subclassed. 117 * @see Collator 118 * @see RuleBasedCollator 119 * @version 1.8 Jan 16 2001 120 */ 121 class U_I18N_API CollationElementIterator final : public UObject { 122 public: 123 124 // CollationElementIterator public data member ------------------------------ 125 126 enum { 127 /** 128 * NULLORDER indicates that an error has occurred while processing 129 * @stable ICU 2.0 130 */ 131 NULLORDER = (int32_t)0xffffffff 132 }; 133 134 // CollationElementIterator public constructor/destructor ------------------- 135 136 /** 137 * Copy constructor. 138 * 139 * @param other the object to be copied from 140 * @stable ICU 2.0 141 */ 142 CollationElementIterator(const CollationElementIterator& other); 143 144 /** 145 * Destructor 146 * @stable ICU 2.0 147 */ 148 virtual ~CollationElementIterator(); 149 150 // CollationElementIterator public methods ---------------------------------- 151 152 /** 153 * Returns true if "other" is the same as "this" 154 * 155 * @param other the object to be compared 156 * @return true if "other" is the same as "this" 157 * @stable ICU 2.0 158 */ 159 bool operator==(const CollationElementIterator& other) const; 160 161 /** 162 * Returns true if "other" is not the same as "this". 163 * 164 * @param other the object to be compared 165 * @return true if "other" is not the same as "this" 166 * @stable ICU 2.0 167 */ 168 bool operator!=(const CollationElementIterator& other) const; 169 170 /** 171 * Resets the cursor to the beginning of the string. 172 * @stable ICU 2.0 173 */ 174 void reset(void); 175 176 /** 177 * Gets the ordering priority of the next character in the string. 178 * @param status the error code status. 179 * @return the next character's ordering. otherwise returns NULLORDER if an 180 * error has occurred or if the end of string has been reached 181 * @stable ICU 2.0 182 */ 183 int32_t next(UErrorCode& status); 184 185 /** 186 * Get the ordering priority of the previous collation element in the string. 187 * @param status the error code status. 188 * @return the previous element's ordering. otherwise returns NULLORDER if an 189 * error has occurred or if the start of string has been reached 190 * @stable ICU 2.0 191 */ 192 int32_t previous(UErrorCode& status); 193 194 /** 195 * Gets the primary order of a collation order. 196 * @param order the collation order 197 * @return the primary order of a collation order. 198 * @stable ICU 2.0 199 */ 200 static inline int32_t primaryOrder(int32_t order); 201 202 /** 203 * Gets the secondary order of a collation order. 204 * @param order the collation order 205 * @return the secondary order of a collation order. 206 * @stable ICU 2.0 207 */ 208 static inline int32_t secondaryOrder(int32_t order); 209 210 /** 211 * Gets the tertiary order of a collation order. 212 * @param order the collation order 213 * @return the tertiary order of a collation order. 214 * @stable ICU 2.0 215 */ 216 static inline int32_t tertiaryOrder(int32_t order); 217 218 /** 219 * Return the maximum length of any expansion sequences that end with the 220 * specified comparison order. 221 * @param order a collation order returned by previous or next. 222 * @return maximum size of the expansion sequences ending with the collation 223 * element or 1 if collation element does not occur at the end of any 224 * expansion sequence 225 * @stable ICU 2.0 226 */ 227 int32_t getMaxExpansion(int32_t order) const; 228 229 /** 230 * Gets the comparison order in the desired strength. Ignore the other 231 * differences. 232 * @param order The order value 233 * @stable ICU 2.0 234 */ 235 int32_t strengthOrder(int32_t order) const; 236 237 /** 238 * Sets the source string. 239 * @param str the source string. 240 * @param status the error code status. 241 * @stable ICU 2.0 242 */ 243 void setText(const UnicodeString& str, UErrorCode& status); 244 245 /** 246 * Sets the source string. 247 * @param str the source character iterator. 248 * @param status the error code status. 249 * @stable ICU 2.0 250 */ 251 void setText(CharacterIterator& str, UErrorCode& status); 252 253 /** 254 * Checks if a comparison order is ignorable. 255 * @param order the collation order. 256 * @return true if a character is ignorable, false otherwise. 257 * @stable ICU 2.0 258 */ 259 static inline UBool isIgnorable(int32_t order); 260 261 /** 262 * Gets the offset of the currently processed character in the source string. 263 * @return the offset of the character. 264 * @stable ICU 2.0 265 */ 266 int32_t getOffset(void) const; 267 268 /** 269 * Sets the offset of the currently processed character in the source string. 270 * @param newOffset the new offset. 271 * @param status the error code status. 272 * @return the offset of the character. 273 * @stable ICU 2.0 274 */ 275 void setOffset(int32_t newOffset, UErrorCode& status); 276 277 /** 278 * ICU "poor man's RTTI", returns a UClassID for the actual class. 279 * 280 * @stable ICU 2.2 281 */ 282 virtual UClassID getDynamicClassID() const override; 283 284 /** 285 * ICU "poor man's RTTI", returns a UClassID for this class. 286 * 287 * @stable ICU 2.2 288 */ 289 static UClassID U_EXPORT2 getStaticClassID(); 290 291 #ifndef U_HIDE_INTERNAL_API 292 /** @internal */ 293 static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { 294 return reinterpret_cast(uc); 295 } 296 /** @internal */ 297 static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { 298 return reinterpret_cast(uc); 299 } 300 /** @internal */ 301 inline UCollationElements *toUCollationElements() { 302 return reinterpret_cast(this); 303 } 304 /** @internal */ 305 inline const UCollationElements *toUCollationElements() const { 306 return reinterpret_cast(this); 307 } 308 #endif // U_HIDE_INTERNAL_API 309 310 private: 311 friend class RuleBasedCollator; 312 friend class UCollationPCE; 313 314 /** 315 * CollationElementIterator constructor. This takes the source string and the 316 * collation object. The cursor will walk thru the source string based on the 317 * predefined collation rules. If the source string is empty, NULLORDER will 318 * be returned on the calls to next(). 319 * @param sourceText the source string. 320 * @param order the collation object. 321 * @param status the error code status. 322 */ 323 CollationElementIterator(const UnicodeString& sourceText, 324 const RuleBasedCollator* order, UErrorCode& status); 325 // Note: The constructors should take settings & tailoring, not a collator, 326 // to avoid circular dependencies. 327 // However, for operator==() we would need to be able to compare tailoring data for equality 328 // without making CollationData or CollationTailoring depend on TailoredSet. 329 // (See the implementation of RuleBasedCollator::operator==().) 330 // That might require creating an intermediate class that would be used 331 // by both CollationElementIterator and RuleBasedCollator 332 // but only contain the part of RBC== related to data and rules. 333 334 /** 335 * CollationElementIterator constructor. This takes the source string and the 336 * collation object. The cursor will walk thru the source string based on the 337 * predefined collation rules. If the source string is empty, NULLORDER will 338 * be returned on the calls to next(). 339 * @param sourceText the source string. 340 * @param order the collation object. 341 * @param status the error code status. 342 */ 343 CollationElementIterator(const CharacterIterator& sourceText, 344 const RuleBasedCollator* order, UErrorCode& status); 345 346 /** 347 * Assignment operator 348 * 349 * @param other the object to be copied 350 */ 351 const CollationElementIterator& 352 operator=(const CollationElementIterator& other); 353 354 CollationElementIterator() = delete; // default constructor not implemented 355 356 /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ 357 inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } 358 359 static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); 360 361 static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); 362 363 // CollationElementIterator private data members ---------------------------- 364 365 CollationIterator *iter_; // owned 366 const RuleBasedCollator *rbc_; // aliased 367 uint32_t otherHalf_; 368 /** 369 * <0: backwards; 0: just after reset() (previous() begins from end); 370 * 1: just after setOffset(); >1: forward 371 */ 372 int8_t dir_; 373 /** 374 * Stores offsets from expansions and from unsafe-backwards iteration, 375 * so that getOffset() returns intermediate offsets for the CEs 376 * that are consistent with forward iteration. 377 */ 378 UVector32 *offsets_; 379 380 UnicodeString string_; 381 }; 382 383 // CollationElementIterator inline method definitions -------------------------- 384 385 inline int32_t CollationElementIterator::primaryOrder(int32_t order) 386 { 387 return (order >> 16) & 0xffff; 388 } 389 390 inline int32_t CollationElementIterator::secondaryOrder(int32_t order) 391 { 392 return (order >> 8) & 0xff; 393 } 394 395 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) 396 { 397 return order & 0xff; 398 } 399 400 inline UBool CollationElementIterator::isIgnorable(int32_t order) 401 { 402 return (order & 0xffff0000) == 0; 403 } 404 405 U_NAMESPACE_END 406 407 #endif /* #if !UCONFIG_NO_COLLATION */ 408 409 #endif /* U_SHOW_CPLUSPLUS_API */ 410 411 #endif