Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/python3.12/unicodeobject.h


$ cat -n /usr/include/python3.12/unicodeobject.h

     1	#ifndef Py_UNICODEOBJECT_H
     2	#define Py_UNICODEOBJECT_H
     3	
     4	#include <stdarg.h>               // va_list
     5	
     6	/*
     7	
     8	Unicode implementation based on original code by Fredrik Lundh,
     9	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
    10	Unicode Integration Proposal. (See
    11	http://www.egenix.com/files/python/unicode-proposal.txt).
    12	
    13	Copyright (c) Corporation for National Research Initiatives.
    14	
    15	
    16	 Original header:
    17	 --------------------------------------------------------------------
    18	
    19	 * Yet another Unicode string type for Python.  This type supports the
    20	 * 16-bit Basic Multilingual Plane (BMP) only.
    21	 *
    22	 * Written by Fredrik Lundh, January 1999.
    23	 *
    24	 * Copyright (c) 1999 by Secret Labs AB.
    25	 * Copyright (c) 1999 by Fredrik Lundh.
    26	 *
    27	 * fredrik@pythonware.com
    28	 * http://www.pythonware.com
    29	 *
    30	 * --------------------------------------------------------------------
    31	 * This Unicode String Type is
    32	 *
    33	 * Copyright (c) 1999 by Secret Labs AB
    34	 * Copyright (c) 1999 by Fredrik Lundh
    35	 *
    36	 * By obtaining, using, and/or copying this software and/or its
    37	 * associated documentation, you agree that you have read, understood,
    38	 * and will comply with the following terms and conditions:
    39	 *
    40	 * Permission to use, copy, modify, and distribute this software and its
    41	 * associated documentation for any purpose and without fee is hereby
    42	 * granted, provided that the above copyright notice appears in all
    43	 * copies, and that both that copyright notice and this permission notice
    44	 * appear in supporting documentation, and that the name of Secret Labs
    45	 * AB or the author not be used in advertising or publicity pertaining to
    46	 * distribution of the software without specific, written prior
    47	 * permission.
    48	 *
    49	 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
    50	 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
    51	 * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
    52	 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    53	 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    54	 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
    55	 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    56	 * -------------------------------------------------------------------- */
    57	
    58	#include <ctype.h>
    59	
    60	/* === Internal API ======================================================= */
    61	
    62	/* --- Internal Unicode Format -------------------------------------------- */
    63	
    64	/* Python 3.x requires unicode */
    65	#define Py_USING_UNICODE
    66	
    67	#ifndef SIZEOF_WCHAR_T
    68	#error Must define SIZEOF_WCHAR_T
    69	#endif
    70	
    71	#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
    72	
    73	/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
    74	   Otherwise, Unicode strings are stored as UCS-2 (with limited support
    75	   for UTF-16) */
    76	
    77	#if Py_UNICODE_SIZE >= 4
    78	#define Py_UNICODE_WIDE
    79	#endif
    80	
    81	/* Set these flags if the platform has "wchar.h" and the
    82	   wchar_t type is a 16-bit unsigned type */
    83	/* #define HAVE_WCHAR_H */
    84	/* #define HAVE_USABLE_WCHAR_T */
    85	
    86	/* If the compiler provides a wchar_t type we try to support it
    87	   through the interface functions PyUnicode_FromWideChar(),
    88	   PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
    89	
    90	#ifdef HAVE_USABLE_WCHAR_T
    91	# ifndef HAVE_WCHAR_H
    92	#  define HAVE_WCHAR_H
    93	# endif
    94	#endif
    95	
    96	#ifdef HAVE_WCHAR_H
    97	#  include <wchar.h>
    98	#endif
    99	
   100	/* Py_UCS4 and Py_UCS2 are typedefs for the respective
   101	   unicode representations. */
   102	typedef uint32_t Py_UCS4;
   103	typedef uint16_t Py_UCS2;
   104	typedef uint8_t Py_UCS1;
   105	
   106	#ifdef __cplusplus
   107	extern "C" {
   108	#endif
   109	
   110	
   111	PyAPI_DATA(PyTypeObject) PyUnicode_Type;
   112	PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
   113	
   114	#define PyUnicode_Check(op) \
   115	    PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
   116	#define PyUnicode_CheckExact(op) Py_IS_TYPE((op), &PyUnicode_Type)
   117	
   118	/* --- Constants ---------------------------------------------------------- */
   119	
   120	/* This Unicode character will be used as replacement character during
   121	   decoding if the errors argument is set to "replace". Note: the
   122	   Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
   123	   Unicode 3.0. */
   124	
   125	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
   126	
   127	/* === Public API ========================================================= */
   128	
   129	/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
   130	PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
   131	    const char *u,             /* UTF-8 encoded string */
   132	    Py_ssize_t size            /* size of buffer */
   133	    );
   134	
   135	/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
   136	   UTF-8 encoded bytes.  The size is determined with strlen(). */
   137	PyAPI_FUNC(PyObject*) PyUnicode_FromString(
   138	    const char *u              /* UTF-8 encoded string */
   139	    );
   140	
   141	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   142	PyAPI_FUNC(PyObject*) PyUnicode_Substring(
   143	    PyObject *str,
   144	    Py_ssize_t start,
   145	    Py_ssize_t end);
   146	#endif
   147	
   148	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   149	/* Copy the string into a UCS4 buffer including the null character if copy_null
   150	   is set. Return NULL and raise an exception on error. Raise a SystemError if
   151	   the buffer is smaller than the string. Return buffer on success.
   152	
   153	   buflen is the length of the buffer in (Py_UCS4) characters. */
   154	PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
   155	    PyObject *unicode,
   156	    Py_UCS4* buffer,
   157	    Py_ssize_t buflen,
   158	    int copy_null);
   159	
   160	/* Copy the string into a UCS4 buffer. A new buffer is allocated using
   161	 * PyMem_Malloc; if this fails, NULL is returned with a memory error
   162	   exception set. */
   163	PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
   164	#endif
   165	
   166	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   167	/* Get the length of the Unicode object. */
   168	
   169	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
   170	    PyObject *unicode
   171	);
   172	#endif
   173	
   174	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   175	/* Read a character from the string. */
   176	
   177	PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
   178	    PyObject *unicode,
   179	    Py_ssize_t index
   180	    );
   181	
   182	/* Write a character to the string. The string must have been created through
   183	   PyUnicode_New, must not be shared, and must not have been hashed yet.
   184	
   185	   Return 0 on success, -1 on error. */
   186	
   187	PyAPI_FUNC(int) PyUnicode_WriteChar(
   188	    PyObject *unicode,
   189	    Py_ssize_t index,
   190	    Py_UCS4 character
   191	    );
   192	#endif
   193	
   194	/* Resize a Unicode object. The length is the number of codepoints.
   195	
   196	   *unicode is modified to point to the new (resized) object and 0
   197	   returned on success.
   198	
   199	   Try to resize the string in place (which is usually faster than allocating
   200	   a new string and copy characters), or create a new string.
   201	
   202	   Error handling is implemented as follows: an exception is set, -1
   203	   is returned and *unicode left untouched.
   204	
   205	   WARNING: The function doesn't check string content, the result may not be a
   206	            string in canonical representation. */
   207	
   208	PyAPI_FUNC(int) PyUnicode_Resize(
   209	    PyObject **unicode,         /* Pointer to the Unicode object */
   210	    Py_ssize_t length           /* New length */
   211	    );
   212	
   213	/* Decode obj to a Unicode object.
   214	
   215	   bytes, bytearray and other bytes-like objects are decoded according to the
   216	   given encoding and error handler. The encoding and error handler can be
   217	   NULL to have the interface use UTF-8 and "strict".
   218	
   219	   All other objects (including Unicode objects) raise an exception.
   220	
   221	   The API returns NULL in case of an error. The caller is responsible
   222	   for decref'ing the returned objects.
   223	
   224	*/
   225	
   226	PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
   227	    PyObject *obj,              /* Object */
   228	    const char *encoding,       /* encoding */
   229	    const char *errors          /* error handling */
   230	    );
   231	
   232	/* Copy an instance of a Unicode subtype to a new true Unicode object if
   233	   necessary. If obj is already a true Unicode object (not a subtype), return
   234	   the reference with *incremented* refcount.
   235	
   236	   The API returns NULL in case of an error. The caller is responsible
   237	   for decref'ing the returned objects.
   238	
   239	*/
   240	
   241	PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
   242	    PyObject *obj      /* Object */
   243	    );
   244	
   245	PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
   246	    const char *format,   /* ASCII-encoded string  */
   247	    va_list vargs
   248	    );
   249	PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
   250	    const char *format,   /* ASCII-encoded string  */
   251	    ...
   252	    );
   253	
   254	PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
   255	PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
   256	    const char *u              /* UTF-8 encoded string */
   257	    );
   258	
   259	/* --- wchar_t support for platforms which support it --------------------- */
   260	
   261	#ifdef HAVE_WCHAR_H
   262	
   263	/* Create a Unicode Object from the wchar_t buffer w of the given
   264	   size.
   265	
   266	   The buffer is copied into the new object. */
   267	
   268	PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
   269	    const wchar_t *w,           /* wchar_t buffer */
   270	    Py_ssize_t size             /* size of buffer */
   271	    );
   272	
   273	/* Copies the Unicode Object contents into the wchar_t buffer w.  At
   274	   most size wchar_t characters are copied.
   275	
   276	   Note that the resulting wchar_t string may or may not be
   277	   0-terminated.  It is the responsibility of the caller to make sure
   278	   that the wchar_t string is 0-terminated in case this is required by
   279	   the application.
   280	
   281	   Returns the number of wchar_t characters copied (excluding a
   282	   possibly trailing 0-termination character) or -1 in case of an
   283	   error. */
   284	
   285	PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
   286	    PyObject *unicode,          /* Unicode object */
   287	    wchar_t *w,                 /* wchar_t buffer */
   288	    Py_ssize_t size             /* size of buffer */
   289	    );
   290	
   291	/* Convert the Unicode object to a wide character string. The output string
   292	   always ends with a nul character. If size is not NULL, write the number of
   293	   wide characters (excluding the null character) into *size.
   294	
   295	   Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
   296	   on success. On error, returns NULL, *size is undefined and raises a
   297	   MemoryError. */
   298	
   299	PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
   300	    PyObject *unicode,          /* Unicode object */
   301	    Py_ssize_t *size            /* number of characters of the result */
   302	    );
   303	
   304	#endif
   305	
   306	/* --- Unicode ordinals --------------------------------------------------- */
   307	
   308	/* Create a Unicode Object from the given Unicode code point ordinal.
   309	
   310	   The ordinal must be in range(0x110000). A ValueError is
   311	   raised in case it is not.
   312	
   313	*/
   314	
   315	PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
   316	
   317	/* === Builtin Codecs =====================================================
   318	
   319	   Many of these APIs take two arguments encoding and errors. These
   320	   parameters encoding and errors have the same semantics as the ones
   321	   of the builtin str() API.
   322	
   323	   Setting encoding to NULL causes the default encoding (UTF-8) to be used.
   324	
   325	   Error handling is set by errors which may also be set to NULL
   326	   meaning to use the default handling defined for the codec. Default
   327	   error handling for all builtin codecs is "strict" (ValueErrors are
   328	   raised).
   329	
   330	   The codecs all use a similar interface. Only deviation from the
   331	   generic ones are documented.
   332	
   333	*/
   334	
   335	/* --- Manage the default encoding ---------------------------------------- */
   336	
   337	/* Returns "utf-8".  */
   338	PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
   339	
   340	/* --- Generic Codecs ----------------------------------------------------- */
   341	
   342	/* Create a Unicode object by decoding the encoded string s of the
   343	   given size. */
   344	
   345	PyAPI_FUNC(PyObject*) PyUnicode_Decode(
   346	    const char *s,              /* encoded string */
   347	    Py_ssize_t size,            /* size of buffer */
   348	    const char *encoding,       /* encoding */
   349	    const char *errors          /* error handling */
   350	    );
   351	
   352	/* Decode a Unicode object unicode and return the result as Python
   353	   object.
   354	
   355	   This API is DEPRECATED. The only supported standard encoding is rot13.
   356	   Use PyCodec_Decode() to decode with rot13 and non-standard codecs
   357	   that decode from str. */
   358	
   359	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
   360	    PyObject *unicode,          /* Unicode object */
   361	    const char *encoding,       /* encoding */
   362	    const char *errors          /* error handling */
   363	    );
   364	
   365	/* Decode a Unicode object unicode and return the result as Unicode
   366	   object.
   367	
   368	   This API is DEPRECATED. The only supported standard encoding is rot13.
   369	   Use PyCodec_Decode() to decode with rot13 and non-standard codecs
   370	   that decode from str to str. */
   371	
   372	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
   373	    PyObject *unicode,          /* Unicode object */
   374	    const char *encoding,       /* encoding */
   375	    const char *errors          /* error handling */
   376	    );
   377	
   378	/* Encodes a Unicode object and returns the result as Python
   379	   object.
   380	
   381	   This API is DEPRECATED.  It is superseded by PyUnicode_AsEncodedString()
   382	   since all standard encodings (except rot13) encode str to bytes.
   383	   Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
   384	   that encode form str to non-bytes. */
   385	
   386	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
   387	    PyObject *unicode,          /* Unicode object */
   388	    const char *encoding,       /* encoding */
   389	    const char *errors          /* error handling */
   390	    );
   391	
   392	/* Encodes a Unicode object and returns the result as Python string
   393	   object. */
   394	
   395	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
   396	    PyObject *unicode,          /* Unicode object */
   397	    const char *encoding,       /* encoding */
   398	    const char *errors          /* error handling */
   399	    );
   400	
   401	/* Encodes a Unicode object and returns the result as Unicode
   402	   object.
   403	
   404	   This API is DEPRECATED.  The only supported standard encodings is rot13.
   405	   Use PyCodec_Encode() to encode with rot13 and non-standard codecs
   406	   that encode from str to str. */
   407	
   408	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
   409	    PyObject *unicode,          /* Unicode object */
   410	    const char *encoding,       /* encoding */
   411	    const char *errors          /* error handling */
   412	    );
   413	
   414	/* Build an encoding map. */
   415	
   416	PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
   417	    PyObject* string            /* 256 character map */
   418	   );
   419	
   420	/* --- UTF-7 Codecs ------------------------------------------------------- */
   421	
   422	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
   423	    const char *string,         /* UTF-7 encoded string */
   424	    Py_ssize_t length,          /* size of string */
   425	    const char *errors          /* error handling */
   426	    );
   427	
   428	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
   429	    const char *string,         /* UTF-7 encoded string */
   430	    Py_ssize_t length,          /* size of string */
   431	    const char *errors,         /* error handling */
   432	    Py_ssize_t *consumed        /* bytes consumed */
   433	    );
   434	
   435	/* --- UTF-8 Codecs ------------------------------------------------------- */
   436	
   437	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
   438	    const char *string,         /* UTF-8 encoded string */
   439	    Py_ssize_t length,          /* size of string */
   440	    const char *errors          /* error handling */
   441	    );
   442	
   443	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
   444	    const char *string,         /* UTF-8 encoded string */
   445	    Py_ssize_t length,          /* size of string */
   446	    const char *errors,         /* error handling */
   447	    Py_ssize_t *consumed        /* bytes consumed */
   448	    );
   449	
   450	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
   451	    PyObject *unicode           /* Unicode object */
   452	    );
   453	
   454	/* Returns a pointer to the default encoding (UTF-8) of the
   455	   Unicode object unicode and the size of the encoded representation
   456	   in bytes stored in *size.
   457	
   458	   In case of an error, no *size is set.
   459	
   460	   This function caches the UTF-8 encoded string in the unicodeobject
   461	   and subsequent calls will return the same string.  The memory is released
   462	   when the unicodeobject is deallocated.
   463	*/
   464	
   465	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000
   466	PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
   467	    PyObject *unicode,
   468	    Py_ssize_t *size);
   469	#endif
   470	
   471	/* --- UTF-32 Codecs ------------------------------------------------------ */
   472	
   473	/* Decodes length bytes from a UTF-32 encoded buffer string and returns
   474	   the corresponding Unicode object.
   475	
   476	   errors (if non-NULL) defines the error handling. It defaults
   477	   to "strict".
   478	
   479	   If byteorder is non-NULL, the decoder starts decoding using the
   480	   given byte order:
   481	
   482	    *byteorder == -1: little endian
   483	    *byteorder == 0:  native order
   484	    *byteorder == 1:  big endian
   485	
   486	   In native mode, the first four bytes of the stream are checked for a
   487	   BOM mark. If found, the BOM mark is analysed, the byte order
   488	   adjusted and the BOM skipped.  In the other modes, no BOM mark
   489	   interpretation is done. After completion, *byteorder is set to the
   490	   current byte order at the end of input data.
   491	
   492	   If byteorder is NULL, the codec starts in native order mode.
   493	
   494	*/
   495	
   496	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
   497	    const char *string,         /* UTF-32 encoded string */
   498	    Py_ssize_t length,          /* size of string */
   499	    const char *errors,         /* error handling */
   500	    int *byteorder              /* pointer to byteorder to use
   501	                                   0=native;-1=LE,1=BE; updated on
   502	                                   exit */
   503	    );
   504	
   505	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
   506	    const char *string,         /* UTF-32 encoded string */
   507	    Py_ssize_t length,          /* size of string */
   508	    const char *errors,         /* error handling */
   509	    int *byteorder,             /* pointer to byteorder to use
   510	                                   0=native;-1=LE,1=BE; updated on
   511	                                   exit */
   512	    Py_ssize_t *consumed        /* bytes consumed */
   513	    );
   514	
   515	/* Returns a Python string using the UTF-32 encoding in native byte
   516	   order. The string always starts with a BOM mark.  */
   517	
   518	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
   519	    PyObject *unicode           /* Unicode object */
   520	    );
   521	
   522	/* Returns a Python string object holding the UTF-32 encoded value of
   523	   the Unicode data.
   524	
   525	   If byteorder is not 0, output is written according to the following
   526	   byte order:
   527	
   528	   byteorder == -1: little endian
   529	   byteorder == 0:  native byte order (writes a BOM mark)
   530	   byteorder == 1:  big endian
   531	
   532	   If byteorder is 0, the output string will always start with the
   533	   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
   534	   prepended.
   535	
   536	*/
   537	
   538	/* --- UTF-16 Codecs ------------------------------------------------------ */
   539	
   540	/* Decodes length bytes from a UTF-16 encoded buffer string and returns
   541	   the corresponding Unicode object.
   542	
   543	   errors (if non-NULL) defines the error handling. It defaults
   544	   to "strict".
   545	
   546	   If byteorder is non-NULL, the decoder starts decoding using the
   547	   given byte order:
   548	
   549	    *byteorder == -1: little endian
   550	    *byteorder == 0:  native order
   551	    *byteorder == 1:  big endian
   552	
   553	   In native mode, the first two bytes of the stream are checked for a
   554	   BOM mark. If found, the BOM mark is analysed, the byte order
   555	   adjusted and the BOM skipped.  In the other modes, no BOM mark
   556	   interpretation is done. After completion, *byteorder is set to the
   557	   current byte order at the end of input data.
   558	
   559	   If byteorder is NULL, the codec starts in native order mode.
   560	
   561	*/
   562	
   563	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
   564	    const char *string,         /* UTF-16 encoded string */
   565	    Py_ssize_t length,          /* size of string */
   566	    const char *errors,         /* error handling */
   567	    int *byteorder              /* pointer to byteorder to use
   568	                                   0=native;-1=LE,1=BE; updated on
   569	                                   exit */
   570	    );
   571	
   572	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
   573	    const char *string,         /* UTF-16 encoded string */
   574	    Py_ssize_t length,          /* size of string */
   575	    const char *errors,         /* error handling */
   576	    int *byteorder,             /* pointer to byteorder to use
   577	                                   0=native;-1=LE,1=BE; updated on
   578	                                   exit */
   579	    Py_ssize_t *consumed        /* bytes consumed */
   580	    );
   581	
   582	/* Returns a Python string using the UTF-16 encoding in native byte
   583	   order. The string always starts with a BOM mark.  */
   584	
   585	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
   586	    PyObject *unicode           /* Unicode object */
   587	    );
   588	
   589	/* --- Unicode-Escape Codecs ---------------------------------------------- */
   590	
   591	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
   592	    const char *string,         /* Unicode-Escape encoded string */
   593	    Py_ssize_t length,          /* size of string */
   594	    const char *errors          /* error handling */
   595	    );
   596	
   597	PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
   598	    PyObject *unicode           /* Unicode object */
   599	    );
   600	
   601	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
   602	
   603	PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
   604	    const char *string,         /* Raw-Unicode-Escape encoded string */
   605	    Py_ssize_t length,          /* size of string */
   606	    const char *errors          /* error handling */
   607	    );
   608	
   609	PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
   610	    PyObject *unicode           /* Unicode object */
   611	    );
   612	
   613	/* --- Latin-1 Codecs -----------------------------------------------------
   614	
   615	   Note: Latin-1 corresponds to the first 256 Unicode ordinals. */
   616	
   617	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
   618	    const char *string,         /* Latin-1 encoded string */
   619	    Py_ssize_t length,          /* size of string */
   620	    const char *errors          /* error handling */
   621	    );
   622	
   623	PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
   624	    PyObject *unicode           /* Unicode object */
   625	    );
   626	
   627	/* --- ASCII Codecs -------------------------------------------------------
   628	
   629	   Only 7-bit ASCII data is expected. All other codes generate errors.
   630	
   631	*/
   632	
   633	PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
   634	    const char *string,         /* ASCII encoded string */
   635	    Py_ssize_t length,          /* size of string */
   636	    const char *errors          /* error handling */
   637	    );
   638	
   639	PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
   640	    PyObject *unicode           /* Unicode object */
   641	    );
   642	
   643	/* --- Character Map Codecs -----------------------------------------------
   644	
   645	   This codec uses mappings to encode and decode characters.
   646	
   647	   Decoding mappings must map byte ordinals (integers in the range from 0 to
   648	   255) to Unicode strings, integers (which are then interpreted as Unicode
   649	   ordinals) or None.  Unmapped data bytes (ones which cause a LookupError)
   650	   as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
   651	   mapping" and cause an error.
   652	
   653	   Encoding mappings must map Unicode ordinal integers to bytes objects,
   654	   integers in the range from 0 to 255 or None.  Unmapped character
   655	   ordinals (ones which cause a LookupError) as well as mapped to
   656	   None are treated as "undefined mapping" and cause an error.
   657	
   658	*/
   659	
   660	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
   661	    const char *string,         /* Encoded string */
   662	    Py_ssize_t length,          /* size of string */
   663	    PyObject *mapping,          /* decoding mapping */
   664	    const char *errors          /* error handling */
   665	    );
   666	
   667	PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
   668	    PyObject *unicode,          /* Unicode object */
   669	    PyObject *mapping           /* encoding mapping */
   670	    );
   671	
   672	/* --- MBCS codecs for Windows -------------------------------------------- */
   673	
   674	#ifdef MS_WINDOWS
   675	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
   676	    const char *string,         /* MBCS encoded string */
   677	    Py_ssize_t length,          /* size of string */
   678	    const char *errors          /* error handling */
   679	    );
   680	
   681	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
   682	    const char *string,         /* MBCS encoded string */
   683	    Py_ssize_t length,          /* size of string */
   684	    const char *errors,         /* error handling */
   685	    Py_ssize_t *consumed        /* bytes consumed */
   686	    );
   687	
   688	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   689	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
   690	    int code_page,              /* code page number */
   691	    const char *string,         /* encoded string */
   692	    Py_ssize_t length,          /* size of string */
   693	    const char *errors,         /* error handling */
   694	    Py_ssize_t *consumed        /* bytes consumed */
   695	    );
   696	#endif
   697	
   698	PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
   699	    PyObject *unicode           /* Unicode object */
   700	    );
   701	
   702	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   703	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
   704	    int code_page,              /* code page number */
   705	    PyObject *unicode,          /* Unicode object */
   706	    const char *errors          /* error handling */
   707	    );
   708	#endif
   709	
   710	#endif /* MS_WINDOWS */
   711	
   712	/* --- Locale encoding --------------------------------------------------- */
   713	
   714	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   715	/* Decode a string from the current locale encoding. The decoder is strict if
   716	   *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
   717	   error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
   718	   be decoded as a surrogate character and *surrogateescape* is not equal to
   719	   zero, the byte sequence is escaped using the 'surrogateescape' error handler
   720	   instead of being decoded. *str* must end with a null character but cannot
   721	   contain embedded null characters. */
   722	
   723	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
   724	    const char *str,
   725	    Py_ssize_t len,
   726	    const char *errors);
   727	
   728	/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
   729	   length using strlen(). */
   730	
   731	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
   732	    const char *str,
   733	    const char *errors);
   734	
   735	/* Encode a Unicode object to the current locale encoding. The encoder is
   736	   strict is *surrogateescape* is equal to zero, otherwise the
   737	   "surrogateescape" error handler is used. Return a bytes object. The string
   738	   cannot contain embedded null characters. */
   739	
   740	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
   741	    PyObject *unicode,
   742	    const char *errors
   743	    );
   744	#endif
   745	
   746	/* --- File system encoding ---------------------------------------------- */
   747	
   748	/* ParseTuple converter: encode str objects to bytes using
   749	   PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
   750	
   751	PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
   752	
   753	/* ParseTuple converter: decode bytes objects to unicode using
   754	   PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
   755	
   756	PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
   757	
   758	/* Decode a null-terminated string from the Python filesystem encoding
   759	   and error handler.
   760	
   761	   If the string length is known, use PyUnicode_DecodeFSDefaultAndSize(). */
   762	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
   763	    const char *s               /* encoded string */
   764	    );
   765	
   766	/* Decode a string from the Python filesystem encoding and error handler. */
   767	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
   768	    const char *s,               /* encoded string */
   769	    Py_ssize_t size              /* size */
   770	    );
   771	
   772	/* Encode a Unicode object to the Python filesystem encoding and error handler.
   773	   Return bytes. */
   774	PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
   775	    PyObject *unicode
   776	    );
   777	
   778	/* --- Methods & Slots ----------------------------------------------------
   779	
   780	   These are capable of handling Unicode objects and strings on input
   781	   (we refer to them as strings in the descriptions) and return
   782	   Unicode objects or integers as appropriate. */
   783	
   784	/* Concat two strings giving a new Unicode string. */
   785	
   786	PyAPI_FUNC(PyObject*) PyUnicode_Concat(
   787	    PyObject *left,             /* Left string */
   788	    PyObject *right             /* Right string */
   789	    );
   790	
   791	/* Concat two strings and put the result in *pleft
   792	   (sets *pleft to NULL on error) */
   793	
   794	PyAPI_FUNC(void) PyUnicode_Append(
   795	    PyObject **pleft,           /* Pointer to left string */
   796	    PyObject *right             /* Right string */
   797	    );
   798	
   799	/* Concat two strings, put the result in *pleft and drop the right object
   800	   (sets *pleft to NULL on error) */
   801	
   802	PyAPI_FUNC(void) PyUnicode_AppendAndDel(
   803	    PyObject **pleft,           /* Pointer to left string */
   804	    PyObject *right             /* Right string */
   805	    );
   806	
   807	/* Split a string giving a list of Unicode strings.
   808	
   809	   If sep is NULL, splitting will be done at all whitespace
   810	   substrings. Otherwise, splits occur at the given separator.
   811	
   812	   At most maxsplit splits will be done. If negative, no limit is set.
   813	
   814	   Separators are not included in the resulting list.
   815	
   816	*/
   817	
   818	PyAPI_FUNC(PyObject*) PyUnicode_Split(
   819	    PyObject *s,                /* String to split */
   820	    PyObject *sep,              /* String separator */
   821	    Py_ssize_t maxsplit         /* Maxsplit count */
   822	    );
   823	
   824	/* Dito, but split at line breaks.
   825	
   826	   CRLF is considered to be one line break. Line breaks are not
   827	   included in the resulting list. */
   828	
   829	PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
   830	    PyObject *s,                /* String to split */
   831	    int keepends                /* If true, line end markers are included */
   832	    );
   833	
   834	/* Partition a string using a given separator. */
   835	
   836	PyAPI_FUNC(PyObject*) PyUnicode_Partition(
   837	    PyObject *s,                /* String to partition */
   838	    PyObject *sep               /* String separator */
   839	    );
   840	
   841	/* Partition a string using a given separator, searching from the end of the
   842	   string. */
   843	
   844	PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
   845	    PyObject *s,                /* String to partition */
   846	    PyObject *sep               /* String separator */
   847	    );
   848	
   849	/* Split a string giving a list of Unicode strings.
   850	
   851	   If sep is NULL, splitting will be done at all whitespace
   852	   substrings. Otherwise, splits occur at the given separator.
   853	
   854	   At most maxsplit splits will be done. But unlike PyUnicode_Split
   855	   PyUnicode_RSplit splits from the end of the string. If negative,
   856	   no limit is set.
   857	
   858	   Separators are not included in the resulting list.
   859	
   860	*/
   861	
   862	PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
   863	    PyObject *s,                /* String to split */
   864	    PyObject *sep,              /* String separator */
   865	    Py_ssize_t maxsplit         /* Maxsplit count */
   866	    );
   867	
   868	/* Translate a string by applying a character mapping table to it and
   869	   return the resulting Unicode object.
   870	
   871	   The mapping table must map Unicode ordinal integers to Unicode strings,
   872	   Unicode ordinal integers or None (causing deletion of the character).
   873	
   874	   Mapping tables may be dictionaries or sequences. Unmapped character
   875	   ordinals (ones which cause a LookupError) are left untouched and
   876	   are copied as-is.
   877	
   878	*/
   879	
   880	PyAPI_FUNC(PyObject *) PyUnicode_Translate(
   881	    PyObject *str,              /* String */
   882	    PyObject *table,            /* Translate table */
   883	    const char *errors          /* error handling */
   884	    );
   885	
   886	/* Join a sequence of strings using the given separator and return
   887	   the resulting Unicode string. */
   888	
   889	PyAPI_FUNC(PyObject*) PyUnicode_Join(
   890	    PyObject *separator,        /* Separator string */
   891	    PyObject *seq               /* Sequence object */
   892	    );
   893	
   894	/* Return 1 if substr matches str[start:end] at the given tail end, 0
   895	   otherwise. */
   896	
   897	PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
   898	    PyObject *str,              /* String */
   899	    PyObject *substr,           /* Prefix or Suffix string */
   900	    Py_ssize_t start,           /* Start index */
   901	    Py_ssize_t end,             /* Stop index */
   902	    int direction               /* Tail end: -1 prefix, +1 suffix */
   903	    );
   904	
   905	/* Return the first position of substr in str[start:end] using the
   906	   given search direction or -1 if not found. -2 is returned in case
   907	   an error occurred and an exception is set. */
   908	
   909	PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
   910	    PyObject *str,              /* String */
   911	    PyObject *substr,           /* Substring to find */
   912	    Py_ssize_t start,           /* Start index */
   913	    Py_ssize_t end,             /* Stop index */
   914	    int direction               /* Find direction: +1 forward, -1 backward */
   915	    );
   916	
   917	#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   918	/* Like PyUnicode_Find, but search for single character only. */
   919	PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
   920	    PyObject *str,
   921	    Py_UCS4 ch,
   922	    Py_ssize_t start,
   923	    Py_ssize_t end,
   924	    int direction
   925	    );
   926	#endif
   927	
   928	/* Count the number of occurrences of substr in str[start:end]. */
   929	
   930	PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
   931	    PyObject *str,              /* String */
   932	    PyObject *substr,           /* Substring to count */
   933	    Py_ssize_t start,           /* Start index */
   934	    Py_ssize_t end              /* Stop index */
   935	    );
   936	
   937	/* Replace at most maxcount occurrences of substr in str with replstr
   938	   and return the resulting Unicode object. */
   939	
   940	PyAPI_FUNC(PyObject *) PyUnicode_Replace(
   941	    PyObject *str,              /* String */
   942	    PyObject *substr,           /* Substring to find */
   943	    PyObject *replstr,          /* Substring to replace */
   944	    Py_ssize_t maxcount         /* Max. number of replacements to apply;
   945	                                   -1 = all */
   946	    );
   947	
   948	/* Compare two strings and return -1, 0, 1 for less than, equal,
   949	   greater than resp.
   950	   Raise an exception and return -1 on error. */
   951	
   952	PyAPI_FUNC(int) PyUnicode_Compare(
   953	    PyObject *left,             /* Left string */
   954	    PyObject *right             /* Right string */
   955	    );
   956	
   957	/* Compare a Unicode object with C string and return -1, 0, 1 for less than,
   958	   equal, and greater than, respectively.  It is best to pass only
   959	   ASCII-encoded strings, but the function interprets the input string as
   960	   ISO-8859-1 if it contains non-ASCII characters.
   961	   This function does not raise exceptions. */
   962	
   963	PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
   964	    PyObject *left,
   965	    const char *right           /* ASCII-encoded string */
   966	    );
   967	
   968	/* Rich compare two strings and return one of the following:
   969	
   970	   - NULL in case an exception was raised
   971	   - Py_True or Py_False for successful comparisons
   972	   - Py_NotImplemented in case the type combination is unknown
   973	
   974	   Possible values for op:
   975	
   976	     Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
   977	
   978	*/
   979	
   980	PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
   981	    PyObject *left,             /* Left string */
   982	    PyObject *right,            /* Right string */
   983	    int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
   984	    );
   985	
   986	/* Apply an argument tuple or dictionary to a format string and return
   987	   the resulting Unicode string. */
   988	
   989	PyAPI_FUNC(PyObject *) PyUnicode_Format(
   990	    PyObject *format,           /* Format string */
   991	    PyObject *args              /* Argument tuple or dictionary */
   992	    );
   993	
   994	/* Checks whether element is contained in container and return 1/0
   995	   accordingly.
   996	
   997	   element has to coerce to a one element Unicode string. -1 is
   998	   returned in case of an error. */
   999	
  1000	PyAPI_FUNC(int) PyUnicode_Contains(
  1001	    PyObject *container,        /* Container string */
  1002	    PyObject *element           /* Element string */
  1003	    );
  1004	
  1005	/* Checks whether argument is a valid identifier. */
  1006	
  1007	PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
  1008	
  1009	/* === Characters Type APIs =============================================== */
  1010	
  1011	#ifndef Py_LIMITED_API
  1012	#  define Py_CPYTHON_UNICODEOBJECT_H
  1013	#  include "cpython/unicodeobject.h"
  1014	#  undef Py_CPYTHON_UNICODEOBJECT_H
  1015	#endif
  1016	
  1017	#ifdef __cplusplus
  1018	}
  1019	#endif
  1020	#endif /* !Py_UNICODEOBJECT_H */