Where Online Learning is simpler!
The C and C++ Include Header Files
/usr/include/python3.12/unicodeobject.h
$ cat -n /usr/include/python3.12/unicodeobject.h 1 #ifndef Py_UNICODEOBJECT_H 2 #define Py_UNICODEOBJECT_H 3 4 #include
// va_list 5 6 /* 7 8 Unicode implementation based on original code by Fredrik Lundh, 9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the 10 Unicode Integration Proposal. (See 11 http://www.egenix.com/files/python/unicode-proposal.txt). 12 13 Copyright (c) Corporation for National Research Initiatives. 14 15 16 Original header: 17 -------------------------------------------------------------------- 18 19 * Yet another Unicode string type for Python. This type supports the 20 * 16-bit Basic Multilingual Plane (BMP) only. 21 * 22 * Written by Fredrik Lundh, January 1999. 23 * 24 * Copyright (c) 1999 by Secret Labs AB. 25 * Copyright (c) 1999 by Fredrik Lundh. 26 * 27 * fredrik@pythonware.com 28 * http://www.pythonware.com 29 * 30 * -------------------------------------------------------------------- 31 * This Unicode String Type is 32 * 33 * Copyright (c) 1999 by Secret Labs AB 34 * Copyright (c) 1999 by Fredrik Lundh 35 * 36 * By obtaining, using, and/or copying this software and/or its 37 * associated documentation, you agree that you have read, understood, 38 * and will comply with the following terms and conditions: 39 * 40 * Permission to use, copy, modify, and distribute this software and its 41 * associated documentation for any purpose and without fee is hereby 42 * granted, provided that the above copyright notice appears in all 43 * copies, and that both that copyright notice and this permission notice 44 * appear in supporting documentation, and that the name of Secret Labs 45 * AB or the author not be used in advertising or publicity pertaining to 46 * distribution of the software without specific, written prior 47 * permission. 48 * 49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO 50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR 52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 56 * -------------------------------------------------------------------- */ 57 58 #include
59 60 /* === Internal API ======================================================= */ 61 62 /* --- Internal Unicode Format -------------------------------------------- */ 63 64 /* Python 3.x requires unicode */ 65 #define Py_USING_UNICODE 66 67 #ifndef SIZEOF_WCHAR_T 68 #error Must define SIZEOF_WCHAR_T 69 #endif 70 71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T 72 73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. 74 Otherwise, Unicode strings are stored as UCS-2 (with limited support 75 for UTF-16) */ 76 77 #if Py_UNICODE_SIZE >= 4 78 #define Py_UNICODE_WIDE 79 #endif 80 81 /* Set these flags if the platform has "wchar.h" and the 82 wchar_t type is a 16-bit unsigned type */ 83 /* #define HAVE_WCHAR_H */ 84 /* #define HAVE_USABLE_WCHAR_T */ 85 86 /* If the compiler provides a wchar_t type we try to support it 87 through the interface functions PyUnicode_FromWideChar(), 88 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ 89 90 #ifdef HAVE_USABLE_WCHAR_T 91 # ifndef HAVE_WCHAR_H 92 # define HAVE_WCHAR_H 93 # endif 94 #endif 95 96 #ifdef HAVE_WCHAR_H 97 # include
98 #endif 99 100 /* Py_UCS4 and Py_UCS2 are typedefs for the respective 101 unicode representations. */ 102 typedef uint32_t Py_UCS4; 103 typedef uint16_t Py_UCS2; 104 typedef uint8_t Py_UCS1; 105 106 #ifdef __cplusplus 107 extern "C" { 108 #endif 109 110 111 PyAPI_DATA(PyTypeObject) PyUnicode_Type; 112 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; 113 114 #define PyUnicode_Check(op) \ 115 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) 116 #define PyUnicode_CheckExact(op) Py_IS_TYPE((op), &PyUnicode_Type) 117 118 /* --- Constants ---------------------------------------------------------- */ 119 120 /* This Unicode character will be used as replacement character during 121 decoding if the errors argument is set to "replace". Note: the 122 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 123 Unicode 3.0. */ 124 125 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) 126 127 /* === Public API ========================================================= */ 128 129 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ 130 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( 131 const char *u, /* UTF-8 encoded string */ 132 Py_ssize_t size /* size of buffer */ 133 ); 134 135 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated 136 UTF-8 encoded bytes. The size is determined with strlen(). */ 137 PyAPI_FUNC(PyObject*) PyUnicode_FromString( 138 const char *u /* UTF-8 encoded string */ 139 ); 140 141 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 142 PyAPI_FUNC(PyObject*) PyUnicode_Substring( 143 PyObject *str, 144 Py_ssize_t start, 145 Py_ssize_t end); 146 #endif 147 148 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 149 /* Copy the string into a UCS4 buffer including the null character if copy_null 150 is set. Return NULL and raise an exception on error. Raise a SystemError if 151 the buffer is smaller than the string. Return buffer on success. 152 153 buflen is the length of the buffer in (Py_UCS4) characters. */ 154 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( 155 PyObject *unicode, 156 Py_UCS4* buffer, 157 Py_ssize_t buflen, 158 int copy_null); 159 160 /* Copy the string into a UCS4 buffer. A new buffer is allocated using 161 * PyMem_Malloc; if this fails, NULL is returned with a memory error 162 exception set. */ 163 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); 164 #endif 165 166 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 167 /* Get the length of the Unicode object. */ 168 169 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( 170 PyObject *unicode 171 ); 172 #endif 173 174 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 175 /* Read a character from the string. */ 176 177 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( 178 PyObject *unicode, 179 Py_ssize_t index 180 ); 181 182 /* Write a character to the string. The string must have been created through 183 PyUnicode_New, must not be shared, and must not have been hashed yet. 184 185 Return 0 on success, -1 on error. */ 186 187 PyAPI_FUNC(int) PyUnicode_WriteChar( 188 PyObject *unicode, 189 Py_ssize_t index, 190 Py_UCS4 character 191 ); 192 #endif 193 194 /* Resize a Unicode object. The length is the number of codepoints. 195 196 *unicode is modified to point to the new (resized) object and 0 197 returned on success. 198 199 Try to resize the string in place (which is usually faster than allocating 200 a new string and copy characters), or create a new string. 201 202 Error handling is implemented as follows: an exception is set, -1 203 is returned and *unicode left untouched. 204 205 WARNING: The function doesn't check string content, the result may not be a 206 string in canonical representation. */ 207 208 PyAPI_FUNC(int) PyUnicode_Resize( 209 PyObject **unicode, /* Pointer to the Unicode object */ 210 Py_ssize_t length /* New length */ 211 ); 212 213 /* Decode obj to a Unicode object. 214 215 bytes, bytearray and other bytes-like objects are decoded according to the 216 given encoding and error handler. The encoding and error handler can be 217 NULL to have the interface use UTF-8 and "strict". 218 219 All other objects (including Unicode objects) raise an exception. 220 221 The API returns NULL in case of an error. The caller is responsible 222 for decref'ing the returned objects. 223 224 */ 225 226 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( 227 PyObject *obj, /* Object */ 228 const char *encoding, /* encoding */ 229 const char *errors /* error handling */ 230 ); 231 232 /* Copy an instance of a Unicode subtype to a new true Unicode object if 233 necessary. If obj is already a true Unicode object (not a subtype), return 234 the reference with *incremented* refcount. 235 236 The API returns NULL in case of an error. The caller is responsible 237 for decref'ing the returned objects. 238 239 */ 240 241 PyAPI_FUNC(PyObject*) PyUnicode_FromObject( 242 PyObject *obj /* Object */ 243 ); 244 245 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( 246 const char *format, /* ASCII-encoded string */ 247 va_list vargs 248 ); 249 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( 250 const char *format, /* ASCII-encoded string */ 251 ... 252 ); 253 254 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); 255 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( 256 const char *u /* UTF-8 encoded string */ 257 ); 258 259 /* --- wchar_t support for platforms which support it --------------------- */ 260 261 #ifdef HAVE_WCHAR_H 262 263 /* Create a Unicode Object from the wchar_t buffer w of the given 264 size. 265 266 The buffer is copied into the new object. */ 267 268 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( 269 const wchar_t *w, /* wchar_t buffer */ 270 Py_ssize_t size /* size of buffer */ 271 ); 272 273 /* Copies the Unicode Object contents into the wchar_t buffer w. At 274 most size wchar_t characters are copied. 275 276 Note that the resulting wchar_t string may or may not be 277 0-terminated. It is the responsibility of the caller to make sure 278 that the wchar_t string is 0-terminated in case this is required by 279 the application. 280 281 Returns the number of wchar_t characters copied (excluding a 282 possibly trailing 0-termination character) or -1 in case of an 283 error. */ 284 285 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( 286 PyObject *unicode, /* Unicode object */ 287 wchar_t *w, /* wchar_t buffer */ 288 Py_ssize_t size /* size of buffer */ 289 ); 290 291 /* Convert the Unicode object to a wide character string. The output string 292 always ends with a nul character. If size is not NULL, write the number of 293 wide characters (excluding the null character) into *size. 294 295 Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) 296 on success. On error, returns NULL, *size is undefined and raises a 297 MemoryError. */ 298 299 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( 300 PyObject *unicode, /* Unicode object */ 301 Py_ssize_t *size /* number of characters of the result */ 302 ); 303 304 #endif 305 306 /* --- Unicode ordinals --------------------------------------------------- */ 307 308 /* Create a Unicode Object from the given Unicode code point ordinal. 309 310 The ordinal must be in range(0x110000). A ValueError is 311 raised in case it is not. 312 313 */ 314 315 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); 316 317 /* === Builtin Codecs ===================================================== 318 319 Many of these APIs take two arguments encoding and errors. These 320 parameters encoding and errors have the same semantics as the ones 321 of the builtin str() API. 322 323 Setting encoding to NULL causes the default encoding (UTF-8) to be used. 324 325 Error handling is set by errors which may also be set to NULL 326 meaning to use the default handling defined for the codec. Default 327 error handling for all builtin codecs is "strict" (ValueErrors are 328 raised). 329 330 The codecs all use a similar interface. Only deviation from the 331 generic ones are documented. 332 333 */ 334 335 /* --- Manage the default encoding ---------------------------------------- */ 336 337 /* Returns "utf-8". */ 338 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); 339 340 /* --- Generic Codecs ----------------------------------------------------- */ 341 342 /* Create a Unicode object by decoding the encoded string s of the 343 given size. */ 344 345 PyAPI_FUNC(PyObject*) PyUnicode_Decode( 346 const char *s, /* encoded string */ 347 Py_ssize_t size, /* size of buffer */ 348 const char *encoding, /* encoding */ 349 const char *errors /* error handling */ 350 ); 351 352 /* Decode a Unicode object unicode and return the result as Python 353 object. 354 355 This API is DEPRECATED. The only supported standard encoding is rot13. 356 Use PyCodec_Decode() to decode with rot13 and non-standard codecs 357 that decode from str. */ 358 359 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( 360 PyObject *unicode, /* Unicode object */ 361 const char *encoding, /* encoding */ 362 const char *errors /* error handling */ 363 ); 364 365 /* Decode a Unicode object unicode and return the result as Unicode 366 object. 367 368 This API is DEPRECATED. The only supported standard encoding is rot13. 369 Use PyCodec_Decode() to decode with rot13 and non-standard codecs 370 that decode from str to str. */ 371 372 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( 373 PyObject *unicode, /* Unicode object */ 374 const char *encoding, /* encoding */ 375 const char *errors /* error handling */ 376 ); 377 378 /* Encodes a Unicode object and returns the result as Python 379 object. 380 381 This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString() 382 since all standard encodings (except rot13) encode str to bytes. 383 Use PyCodec_Encode() for encoding with rot13 and non-standard codecs 384 that encode form str to non-bytes. */ 385 386 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( 387 PyObject *unicode, /* Unicode object */ 388 const char *encoding, /* encoding */ 389 const char *errors /* error handling */ 390 ); 391 392 /* Encodes a Unicode object and returns the result as Python string 393 object. */ 394 395 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( 396 PyObject *unicode, /* Unicode object */ 397 const char *encoding, /* encoding */ 398 const char *errors /* error handling */ 399 ); 400 401 /* Encodes a Unicode object and returns the result as Unicode 402 object. 403 404 This API is DEPRECATED. The only supported standard encodings is rot13. 405 Use PyCodec_Encode() to encode with rot13 and non-standard codecs 406 that encode from str to str. */ 407 408 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( 409 PyObject *unicode, /* Unicode object */ 410 const char *encoding, /* encoding */ 411 const char *errors /* error handling */ 412 ); 413 414 /* Build an encoding map. */ 415 416 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( 417 PyObject* string /* 256 character map */ 418 ); 419 420 /* --- UTF-7 Codecs ------------------------------------------------------- */ 421 422 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( 423 const char *string, /* UTF-7 encoded string */ 424 Py_ssize_t length, /* size of string */ 425 const char *errors /* error handling */ 426 ); 427 428 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( 429 const char *string, /* UTF-7 encoded string */ 430 Py_ssize_t length, /* size of string */ 431 const char *errors, /* error handling */ 432 Py_ssize_t *consumed /* bytes consumed */ 433 ); 434 435 /* --- UTF-8 Codecs ------------------------------------------------------- */ 436 437 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( 438 const char *string, /* UTF-8 encoded string */ 439 Py_ssize_t length, /* size of string */ 440 const char *errors /* error handling */ 441 ); 442 443 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( 444 const char *string, /* UTF-8 encoded string */ 445 Py_ssize_t length, /* size of string */ 446 const char *errors, /* error handling */ 447 Py_ssize_t *consumed /* bytes consumed */ 448 ); 449 450 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( 451 PyObject *unicode /* Unicode object */ 452 ); 453 454 /* Returns a pointer to the default encoding (UTF-8) of the 455 Unicode object unicode and the size of the encoded representation 456 in bytes stored in *size. 457 458 In case of an error, no *size is set. 459 460 This function caches the UTF-8 encoded string in the unicodeobject 461 and subsequent calls will return the same string. The memory is released 462 when the unicodeobject is deallocated. 463 */ 464 465 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 466 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( 467 PyObject *unicode, 468 Py_ssize_t *size); 469 #endif 470 471 /* --- UTF-32 Codecs ------------------------------------------------------ */ 472 473 /* Decodes length bytes from a UTF-32 encoded buffer string and returns 474 the corresponding Unicode object. 475 476 errors (if non-NULL) defines the error handling. It defaults 477 to "strict". 478 479 If byteorder is non-NULL, the decoder starts decoding using the 480 given byte order: 481 482 *byteorder == -1: little endian 483 *byteorder == 0: native order 484 *byteorder == 1: big endian 485 486 In native mode, the first four bytes of the stream are checked for a 487 BOM mark. If found, the BOM mark is analysed, the byte order 488 adjusted and the BOM skipped. In the other modes, no BOM mark 489 interpretation is done. After completion, *byteorder is set to the 490 current byte order at the end of input data. 491 492 If byteorder is NULL, the codec starts in native order mode. 493 494 */ 495 496 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( 497 const char *string, /* UTF-32 encoded string */ 498 Py_ssize_t length, /* size of string */ 499 const char *errors, /* error handling */ 500 int *byteorder /* pointer to byteorder to use 501 0=native;-1=LE,1=BE; updated on 502 exit */ 503 ); 504 505 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( 506 const char *string, /* UTF-32 encoded string */ 507 Py_ssize_t length, /* size of string */ 508 const char *errors, /* error handling */ 509 int *byteorder, /* pointer to byteorder to use 510 0=native;-1=LE,1=BE; updated on 511 exit */ 512 Py_ssize_t *consumed /* bytes consumed */ 513 ); 514 515 /* Returns a Python string using the UTF-32 encoding in native byte 516 order. The string always starts with a BOM mark. */ 517 518 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( 519 PyObject *unicode /* Unicode object */ 520 ); 521 522 /* Returns a Python string object holding the UTF-32 encoded value of 523 the Unicode data. 524 525 If byteorder is not 0, output is written according to the following 526 byte order: 527 528 byteorder == -1: little endian 529 byteorder == 0: native byte order (writes a BOM mark) 530 byteorder == 1: big endian 531 532 If byteorder is 0, the output string will always start with the 533 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 534 prepended. 535 536 */ 537 538 /* --- UTF-16 Codecs ------------------------------------------------------ */ 539 540 /* Decodes length bytes from a UTF-16 encoded buffer string and returns 541 the corresponding Unicode object. 542 543 errors (if non-NULL) defines the error handling. It defaults 544 to "strict". 545 546 If byteorder is non-NULL, the decoder starts decoding using the 547 given byte order: 548 549 *byteorder == -1: little endian 550 *byteorder == 0: native order 551 *byteorder == 1: big endian 552 553 In native mode, the first two bytes of the stream are checked for a 554 BOM mark. If found, the BOM mark is analysed, the byte order 555 adjusted and the BOM skipped. In the other modes, no BOM mark 556 interpretation is done. After completion, *byteorder is set to the 557 current byte order at the end of input data. 558 559 If byteorder is NULL, the codec starts in native order mode. 560 561 */ 562 563 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( 564 const char *string, /* UTF-16 encoded string */ 565 Py_ssize_t length, /* size of string */ 566 const char *errors, /* error handling */ 567 int *byteorder /* pointer to byteorder to use 568 0=native;-1=LE,1=BE; updated on 569 exit */ 570 ); 571 572 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( 573 const char *string, /* UTF-16 encoded string */ 574 Py_ssize_t length, /* size of string */ 575 const char *errors, /* error handling */ 576 int *byteorder, /* pointer to byteorder to use 577 0=native;-1=LE,1=BE; updated on 578 exit */ 579 Py_ssize_t *consumed /* bytes consumed */ 580 ); 581 582 /* Returns a Python string using the UTF-16 encoding in native byte 583 order. The string always starts with a BOM mark. */ 584 585 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( 586 PyObject *unicode /* Unicode object */ 587 ); 588 589 /* --- Unicode-Escape Codecs ---------------------------------------------- */ 590 591 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( 592 const char *string, /* Unicode-Escape encoded string */ 593 Py_ssize_t length, /* size of string */ 594 const char *errors /* error handling */ 595 ); 596 597 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( 598 PyObject *unicode /* Unicode object */ 599 ); 600 601 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ 602 603 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( 604 const char *string, /* Raw-Unicode-Escape encoded string */ 605 Py_ssize_t length, /* size of string */ 606 const char *errors /* error handling */ 607 ); 608 609 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( 610 PyObject *unicode /* Unicode object */ 611 ); 612 613 /* --- Latin-1 Codecs ----------------------------------------------------- 614 615 Note: Latin-1 corresponds to the first 256 Unicode ordinals. */ 616 617 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( 618 const char *string, /* Latin-1 encoded string */ 619 Py_ssize_t length, /* size of string */ 620 const char *errors /* error handling */ 621 ); 622 623 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( 624 PyObject *unicode /* Unicode object */ 625 ); 626 627 /* --- ASCII Codecs ------------------------------------------------------- 628 629 Only 7-bit ASCII data is expected. All other codes generate errors. 630 631 */ 632 633 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( 634 const char *string, /* ASCII encoded string */ 635 Py_ssize_t length, /* size of string */ 636 const char *errors /* error handling */ 637 ); 638 639 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( 640 PyObject *unicode /* Unicode object */ 641 ); 642 643 /* --- Character Map Codecs ----------------------------------------------- 644 645 This codec uses mappings to encode and decode characters. 646 647 Decoding mappings must map byte ordinals (integers in the range from 0 to 648 255) to Unicode strings, integers (which are then interpreted as Unicode 649 ordinals) or None. Unmapped data bytes (ones which cause a LookupError) 650 as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined 651 mapping" and cause an error. 652 653 Encoding mappings must map Unicode ordinal integers to bytes objects, 654 integers in the range from 0 to 255 or None. Unmapped character 655 ordinals (ones which cause a LookupError) as well as mapped to 656 None are treated as "undefined mapping" and cause an error. 657 658 */ 659 660 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( 661 const char *string, /* Encoded string */ 662 Py_ssize_t length, /* size of string */ 663 PyObject *mapping, /* decoding mapping */ 664 const char *errors /* error handling */ 665 ); 666 667 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( 668 PyObject *unicode, /* Unicode object */ 669 PyObject *mapping /* encoding mapping */ 670 ); 671 672 /* --- MBCS codecs for Windows -------------------------------------------- */ 673 674 #ifdef MS_WINDOWS 675 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( 676 const char *string, /* MBCS encoded string */ 677 Py_ssize_t length, /* size of string */ 678 const char *errors /* error handling */ 679 ); 680 681 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( 682 const char *string, /* MBCS encoded string */ 683 Py_ssize_t length, /* size of string */ 684 const char *errors, /* error handling */ 685 Py_ssize_t *consumed /* bytes consumed */ 686 ); 687 688 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 689 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( 690 int code_page, /* code page number */ 691 const char *string, /* encoded string */ 692 Py_ssize_t length, /* size of string */ 693 const char *errors, /* error handling */ 694 Py_ssize_t *consumed /* bytes consumed */ 695 ); 696 #endif 697 698 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( 699 PyObject *unicode /* Unicode object */ 700 ); 701 702 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 703 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( 704 int code_page, /* code page number */ 705 PyObject *unicode, /* Unicode object */ 706 const char *errors /* error handling */ 707 ); 708 #endif 709 710 #endif /* MS_WINDOWS */ 711 712 /* --- Locale encoding --------------------------------------------------- */ 713 714 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 715 /* Decode a string from the current locale encoding. The decoder is strict if 716 *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' 717 error handler (PEP 383) to escape undecodable bytes. If a byte sequence can 718 be decoded as a surrogate character and *surrogateescape* is not equal to 719 zero, the byte sequence is escaped using the 'surrogateescape' error handler 720 instead of being decoded. *str* must end with a null character but cannot 721 contain embedded null characters. */ 722 723 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( 724 const char *str, 725 Py_ssize_t len, 726 const char *errors); 727 728 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string 729 length using strlen(). */ 730 731 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( 732 const char *str, 733 const char *errors); 734 735 /* Encode a Unicode object to the current locale encoding. The encoder is 736 strict is *surrogateescape* is equal to zero, otherwise the 737 "surrogateescape" error handler is used. Return a bytes object. The string 738 cannot contain embedded null characters. */ 739 740 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( 741 PyObject *unicode, 742 const char *errors 743 ); 744 #endif 745 746 /* --- File system encoding ---------------------------------------------- */ 747 748 /* ParseTuple converter: encode str objects to bytes using 749 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 750 751 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); 752 753 /* ParseTuple converter: decode bytes objects to unicode using 754 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 755 756 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); 757 758 /* Decode a null-terminated string from the Python filesystem encoding 759 and error handler. 760 761 If the string length is known, use PyUnicode_DecodeFSDefaultAndSize(). */ 762 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( 763 const char *s /* encoded string */ 764 ); 765 766 /* Decode a string from the Python filesystem encoding and error handler. */ 767 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( 768 const char *s, /* encoded string */ 769 Py_ssize_t size /* size */ 770 ); 771 772 /* Encode a Unicode object to the Python filesystem encoding and error handler. 773 Return bytes. */ 774 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( 775 PyObject *unicode 776 ); 777 778 /* --- Methods & Slots ---------------------------------------------------- 779 780 These are capable of handling Unicode objects and strings on input 781 (we refer to them as strings in the descriptions) and return 782 Unicode objects or integers as appropriate. */ 783 784 /* Concat two strings giving a new Unicode string. */ 785 786 PyAPI_FUNC(PyObject*) PyUnicode_Concat( 787 PyObject *left, /* Left string */ 788 PyObject *right /* Right string */ 789 ); 790 791 /* Concat two strings and put the result in *pleft 792 (sets *pleft to NULL on error) */ 793 794 PyAPI_FUNC(void) PyUnicode_Append( 795 PyObject **pleft, /* Pointer to left string */ 796 PyObject *right /* Right string */ 797 ); 798 799 /* Concat two strings, put the result in *pleft and drop the right object 800 (sets *pleft to NULL on error) */ 801 802 PyAPI_FUNC(void) PyUnicode_AppendAndDel( 803 PyObject **pleft, /* Pointer to left string */ 804 PyObject *right /* Right string */ 805 ); 806 807 /* Split a string giving a list of Unicode strings. 808 809 If sep is NULL, splitting will be done at all whitespace 810 substrings. Otherwise, splits occur at the given separator. 811 812 At most maxsplit splits will be done. If negative, no limit is set. 813 814 Separators are not included in the resulting list. 815 816 */ 817 818 PyAPI_FUNC(PyObject*) PyUnicode_Split( 819 PyObject *s, /* String to split */ 820 PyObject *sep, /* String separator */ 821 Py_ssize_t maxsplit /* Maxsplit count */ 822 ); 823 824 /* Dito, but split at line breaks. 825 826 CRLF is considered to be one line break. Line breaks are not 827 included in the resulting list. */ 828 829 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( 830 PyObject *s, /* String to split */ 831 int keepends /* If true, line end markers are included */ 832 ); 833 834 /* Partition a string using a given separator. */ 835 836 PyAPI_FUNC(PyObject*) PyUnicode_Partition( 837 PyObject *s, /* String to partition */ 838 PyObject *sep /* String separator */ 839 ); 840 841 /* Partition a string using a given separator, searching from the end of the 842 string. */ 843 844 PyAPI_FUNC(PyObject*) PyUnicode_RPartition( 845 PyObject *s, /* String to partition */ 846 PyObject *sep /* String separator */ 847 ); 848 849 /* Split a string giving a list of Unicode strings. 850 851 If sep is NULL, splitting will be done at all whitespace 852 substrings. Otherwise, splits occur at the given separator. 853 854 At most maxsplit splits will be done. But unlike PyUnicode_Split 855 PyUnicode_RSplit splits from the end of the string. If negative, 856 no limit is set. 857 858 Separators are not included in the resulting list. 859 860 */ 861 862 PyAPI_FUNC(PyObject*) PyUnicode_RSplit( 863 PyObject *s, /* String to split */ 864 PyObject *sep, /* String separator */ 865 Py_ssize_t maxsplit /* Maxsplit count */ 866 ); 867 868 /* Translate a string by applying a character mapping table to it and 869 return the resulting Unicode object. 870 871 The mapping table must map Unicode ordinal integers to Unicode strings, 872 Unicode ordinal integers or None (causing deletion of the character). 873 874 Mapping tables may be dictionaries or sequences. Unmapped character 875 ordinals (ones which cause a LookupError) are left untouched and 876 are copied as-is. 877 878 */ 879 880 PyAPI_FUNC(PyObject *) PyUnicode_Translate( 881 PyObject *str, /* String */ 882 PyObject *table, /* Translate table */ 883 const char *errors /* error handling */ 884 ); 885 886 /* Join a sequence of strings using the given separator and return 887 the resulting Unicode string. */ 888 889 PyAPI_FUNC(PyObject*) PyUnicode_Join( 890 PyObject *separator, /* Separator string */ 891 PyObject *seq /* Sequence object */ 892 ); 893 894 /* Return 1 if substr matches str[start:end] at the given tail end, 0 895 otherwise. */ 896 897 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( 898 PyObject *str, /* String */ 899 PyObject *substr, /* Prefix or Suffix string */ 900 Py_ssize_t start, /* Start index */ 901 Py_ssize_t end, /* Stop index */ 902 int direction /* Tail end: -1 prefix, +1 suffix */ 903 ); 904 905 /* Return the first position of substr in str[start:end] using the 906 given search direction or -1 if not found. -2 is returned in case 907 an error occurred and an exception is set. */ 908 909 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( 910 PyObject *str, /* String */ 911 PyObject *substr, /* Substring to find */ 912 Py_ssize_t start, /* Start index */ 913 Py_ssize_t end, /* Stop index */ 914 int direction /* Find direction: +1 forward, -1 backward */ 915 ); 916 917 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 918 /* Like PyUnicode_Find, but search for single character only. */ 919 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( 920 PyObject *str, 921 Py_UCS4 ch, 922 Py_ssize_t start, 923 Py_ssize_t end, 924 int direction 925 ); 926 #endif 927 928 /* Count the number of occurrences of substr in str[start:end]. */ 929 930 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( 931 PyObject *str, /* String */ 932 PyObject *substr, /* Substring to count */ 933 Py_ssize_t start, /* Start index */ 934 Py_ssize_t end /* Stop index */ 935 ); 936 937 /* Replace at most maxcount occurrences of substr in str with replstr 938 and return the resulting Unicode object. */ 939 940 PyAPI_FUNC(PyObject *) PyUnicode_Replace( 941 PyObject *str, /* String */ 942 PyObject *substr, /* Substring to find */ 943 PyObject *replstr, /* Substring to replace */ 944 Py_ssize_t maxcount /* Max. number of replacements to apply; 945 -1 = all */ 946 ); 947 948 /* Compare two strings and return -1, 0, 1 for less than, equal, 949 greater than resp. 950 Raise an exception and return -1 on error. */ 951 952 PyAPI_FUNC(int) PyUnicode_Compare( 953 PyObject *left, /* Left string */ 954 PyObject *right /* Right string */ 955 ); 956 957 /* Compare a Unicode object with C string and return -1, 0, 1 for less than, 958 equal, and greater than, respectively. It is best to pass only 959 ASCII-encoded strings, but the function interprets the input string as 960 ISO-8859-1 if it contains non-ASCII characters. 961 This function does not raise exceptions. */ 962 963 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( 964 PyObject *left, 965 const char *right /* ASCII-encoded string */ 966 ); 967 968 /* Rich compare two strings and return one of the following: 969 970 - NULL in case an exception was raised 971 - Py_True or Py_False for successful comparisons 972 - Py_NotImplemented in case the type combination is unknown 973 974 Possible values for op: 975 976 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 977 978 */ 979 980 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( 981 PyObject *left, /* Left string */ 982 PyObject *right, /* Right string */ 983 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ 984 ); 985 986 /* Apply an argument tuple or dictionary to a format string and return 987 the resulting Unicode string. */ 988 989 PyAPI_FUNC(PyObject *) PyUnicode_Format( 990 PyObject *format, /* Format string */ 991 PyObject *args /* Argument tuple or dictionary */ 992 ); 993 994 /* Checks whether element is contained in container and return 1/0 995 accordingly. 996 997 element has to coerce to a one element Unicode string. -1 is 998 returned in case of an error. */ 999 1000 PyAPI_FUNC(int) PyUnicode_Contains( 1001 PyObject *container, /* Container string */ 1002 PyObject *element /* Element string */ 1003 ); 1004 1005 /* Checks whether argument is a valid identifier. */ 1006 1007 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); 1008 1009 /* === Characters Type APIs =============================================== */ 1010 1011 #ifndef Py_LIMITED_API 1012 # define Py_CPYTHON_UNICODEOBJECT_H 1013 # include "cpython/unicodeobject.h" 1014 # undef Py_CPYTHON_UNICODEOBJECT_H 1015 #endif 1016 1017 #ifdef __cplusplus 1018 } 1019 #endif 1020 #endif /* !Py_UNICODEOBJECT_H */
Contact us
|
About us
|
Term of use
|
Copyright © 2000-2025 MyWebUniversity.com ™