Where Online Learning is simpler!

The C and C++ Include Header Files

/usr/include/unicode/ushape.h


$ cat -n /usr/include/unicode/ushape.h

     1	// © 2016 and later: Unicode, Inc. and others.
     2	// License & terms of use: http://www.unicode.org/copyright.html
     3	/*
     4	******************************************************************************
     5	*
     6	*   Copyright (C) 2000-2012, International Business Machines
     7	*   Corporation and others.  All Rights Reserved.
     8	*
     9	******************************************************************************
    10	*   file name:  ushape.h
    11	*   encoding:   UTF-8
    12	*   tab size:   8 (not used)
    13	*   indentation:4
    14	*
    15	*   created on: 2000jun29
    16	*   created by: Markus W. Scherer
    17	*/
    18	
    19	#ifndef __USHAPE_H__
    20	#define __USHAPE_H__
    21	
    22	#include "unicode/utypes.h"
    23	
    24	/**
    25	 * \file
    26	 * \brief C API:  Arabic shaping
    27	 * 
    28	 */
    29	
    30	/**
    31	 * Shape Arabic text on a character basis.
    32	 *
    33	 * <p>This function performs basic operations for "shaping" Arabic text. It is most
    34	 * useful for use with legacy data formats and legacy display technology
    35	 * (simple terminals). All operations are performed on Unicode characters.</p>
    36	 *
    37	 * <p>Text-based shaping means that some character code points in the text are
    38	 * replaced by others depending on the context. It transforms one kind of text
    39	 * into another. In comparison, modern displays for Arabic text select
    40	 * appropriate, context-dependent font glyphs for each text element, which means
    41	 * that they transform text into a glyph vector.</p>
    42	 *
    43	 * <p>Text transformations are necessary when modern display technology is not
    44	 * available or when text needs to be transformed to or from legacy formats that
    45	 * use "shaped" characters. Since the Arabic script is cursive, connecting
    46	 * adjacent letters to each other, computers select images for each letter based
    47	 * on the surrounding letters. This usually results in four images per Arabic
    48	 * letter: initial, middle, final, and isolated forms. In Unicode, on the other
    49	 * hand, letters are normally stored abstract, and a display system is expected
    50	 * to select the necessary glyphs. (This makes searching and other text
    51	 * processing easier because the same letter has only one code.) It is possible
    52	 * to mimic this with text transformations because there are characters in
    53	 * Unicode that are rendered as letters with a specific shape
    54	 * (or cursive connectivity). They were included for interoperability with
    55	 * legacy systems and codepages, and for unsophisticated display systems.</p>
    56	 *
    57	 * <p>A second kind of text transformations is supported for Arabic digits:
    58	 * For compatibility with legacy codepages that only include European digits,
    59	 * it is possible to replace one set of digits by another, changing the
    60	 * character code points. These operations can be performed for either
    61	 * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
    62	 * digits (U+06f0...U+06f9).</p>
    63	 *
    64	 * <p>Some replacements may result in more or fewer characters (code points).
    65	 * By default, this means that the destination buffer may receive text with a
    66	 * length different from the source length. Some legacy systems rely on the
    67	 * length of the text to be constant. They expect extra spaces to be added
    68	 * or consumed either next to the affected character or at the end of the
    69	 * text.</p>
    70	 *
    71	 * <p>For details about the available operations, see the description of the
    72	 * <code>U_SHAPE_...</code> options.</p>
    73	 *
    74	 * @param source The input text.
    75	 *
    76	 * @param sourceLength The number of UChars in <code>source</code>.
    77	 *
    78	 * @param dest The destination buffer that will receive the results of the
    79	 *             requested operations. It may be <code>NULL</code> only if
    80	 *             <code>destSize</code> is 0. The source and destination must not
    81	 *             overlap.
    82	 *
    83	 * @param destSize The size (capacity) of the destination buffer in UChars.
    84	 *                 If <code>destSize</code> is 0, then no output is produced,
    85	 *                 but the necessary buffer size is returned ("preflighting").
    86	 *
    87	 * @param options This is a 32-bit set of flags that specify the operations
    88	 *                that are performed on the input text. If no error occurs,
    89	 *                then the result will always be written to the destination
    90	 *                buffer.
    91	 *
    92	 * @param pErrorCode must be a valid pointer to an error code value,
    93	 *        which must not indicate a failure before the function call.
    94	 *
    95	 * @return The number of UChars written to the destination buffer.
    96	 *         If an error occurred, then no output was written, or it may be
    97	 *         incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
    98	 *         the return value indicates the necessary destination buffer size.
    99	 * @stable ICU 2.0
   100	 */
   101	U_CAPI int32_t U_EXPORT2
   102	u_shapeArabic(const UChar *source, int32_t sourceLength,
   103	              UChar *dest, int32_t destSize,
   104	              uint32_t options,
   105	              UErrorCode *pErrorCode);
   106	
   107	/**
   108	 * Memory option: allow the result to have a different length than the source.
   109	 * Affects: LamAlef options
   110	 * @stable ICU 2.0
   111	 */
   112	#define U_SHAPE_LENGTH_GROW_SHRINK              0
   113	
   114	/**
   115	 * Memory option: allow the result to have a different length than the source.
   116	 * Affects: LamAlef options
   117	 * This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK
   118	 * @stable ICU 4.2
   119	 */
   120	#define U_SHAPE_LAMALEF_RESIZE                  0 
   121	
   122	/**
   123	 * Memory option: the result must have the same length as the source.
   124	 * If more room is necessary, then try to consume spaces next to modified characters.
   125	 * @stable ICU 2.0
   126	 */
   127	#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR        1
   128	
   129	/**
   130	 * Memory option: the result must have the same length as the source.
   131	 * If more room is necessary, then try to consume spaces next to modified characters.
   132	 * Affects: LamAlef options
   133	 * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR
   134	 * @stable ICU 4.2
   135	 */
   136	#define U_SHAPE_LAMALEF_NEAR                    1 
   137	
   138	/**
   139	 * Memory option: the result must have the same length as the source.
   140	 * If more room is necessary, then try to consume spaces at the end of the text.
   141	 * @stable ICU 2.0
   142	 */
   143	#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END      2
   144	
   145	/**
   146	 * Memory option: the result must have the same length as the source.
   147	 * If more room is necessary, then try to consume spaces at the end of the text.
   148	 * Affects: LamAlef options
   149	 * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END
   150	 * @stable ICU 4.2
   151	 */
   152	#define U_SHAPE_LAMALEF_END                     2 
   153	
   154	/**
   155	 * Memory option: the result must have the same length as the source.
   156	 * If more room is necessary, then try to consume spaces at the beginning of the text.
   157	 * @stable ICU 2.0
   158	 */
   159	#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
   160	
   161	/**
   162	 * Memory option: the result must have the same length as the source.
   163	 * If more room is necessary, then try to consume spaces at the beginning of the text.
   164	 * Affects: LamAlef options
   165	 * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING
   166	 * @stable ICU 4.2
   167	 */
   168	#define U_SHAPE_LAMALEF_BEGIN                    3 
   169	
   170	
   171	/**
   172	 * Memory option: the result must have the same length as the source.
   173	 * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end.
   174	 *               If there is no space at end, use spaces at beginning of the buffer. If there
   175	 *               is no space at beginning of the buffer, use spaces at the near (i.e. the space
   176	 *               after the LAMALEF character).
   177	 *               If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) 
   178	 *               will be set in pErrorCode
   179	 *
   180	 * Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END. 
   181	 * Affects: LamAlef options
   182	 * @stable ICU 4.2
   183	 */
   184	#define U_SHAPE_LAMALEF_AUTO                     0x10000 
   185	
   186	/** Bit mask for memory options. @stable ICU 2.0 */
   187	#define U_SHAPE_LENGTH_MASK                      0x10003 /* Changed old value 3 */
   188	
   189	
   190	/**
   191	 * Bit mask for LamAlef memory options.
   192	 * @stable ICU 4.2
   193	 */
   194	#define U_SHAPE_LAMALEF_MASK                     0x10003 /* updated */
   195	
   196	/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
   197	#define U_SHAPE_TEXT_DIRECTION_LOGICAL          0
   198	
   199	/**
   200	 * Direction indicator:
   201	 * the source is in visual RTL order,
   202	 * the rightmost displayed character stored first.
   203	 * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL
   204	 * @stable ICU 4.2
   205	 */
   206	#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL       0
   207	
   208	/**
   209	 * Direction indicator:
   210	 * the source is in visual LTR order,
   211	 * the leftmost displayed character stored first.
   212	 * @stable ICU 2.0
   213	 */
   214	#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR       4
   215	
   216	/** Bit mask for direction indicators. @stable ICU 2.0 */
   217	#define U_SHAPE_TEXT_DIRECTION_MASK             4
   218	
   219	
   220	/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
   221	#define U_SHAPE_LETTERS_NOOP                    0
   222	
   223	/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
   224	#define U_SHAPE_LETTERS_SHAPE                   8
   225	
   226	/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
   227	#define U_SHAPE_LETTERS_UNSHAPE                 0x10
   228	
   229	/**
   230	 * Letter shaping option: replace abstract letter characters by "shaped" ones.
   231	 * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
   232	 * are always "shaped" into the isolated form instead of the medial form
   233	 * (selecting code points from the Arabic Presentation Forms-B block).
   234	 * @stable ICU 2.0
   235	 */
   236	#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
   237	
   238	
   239	/** Bit mask for letter shaping options. @stable ICU 2.0 */
   240	#define U_SHAPE_LETTERS_MASK                        0x18
   241	
   242	
   243	/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
   244	#define U_SHAPE_DIGITS_NOOP                     0
   245	
   246	/**
   247	 * Digit shaping option:
   248	 * Replace European digits (U+0030...) by Arabic-Indic digits.
   249	 * @stable ICU 2.0
   250	 */
   251	#define U_SHAPE_DIGITS_EN2AN                    0x20
   252	
   253	/**
   254	 * Digit shaping option:
   255	 * Replace Arabic-Indic digits by European digits (U+0030...).
   256	 * @stable ICU 2.0
   257	 */
   258	#define U_SHAPE_DIGITS_AN2EN                    0x40
   259	
   260	/**
   261	 * Digit shaping option:
   262	 * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
   263	 * strongly directional character is an Arabic letter
   264	 * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
   265	 * The direction of "preceding" depends on the direction indicator option.
   266	 * For the first characters, the preceding strongly directional character
   267	 * (initial state) is assumed to be not an Arabic letter
   268	 * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
   269	 * @stable ICU 2.0
   270	 */
   271	#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR          0x60
   272	
   273	/**
   274	 * Digit shaping option:
   275	 * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
   276	 * strongly directional character is an Arabic letter
   277	 * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
   278	 * The direction of "preceding" depends on the direction indicator option.
   279	 * For the first characters, the preceding strongly directional character
   280	 * (initial state) is assumed to be an Arabic letter.
   281	 * @stable ICU 2.0
   282	 */
   283	#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL          0x80
   284	
   285	/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
   286	#define U_SHAPE_DIGITS_RESERVED                 0xa0
   287	
   288	/** Bit mask for digit shaping options. @stable ICU 2.0 */
   289	#define U_SHAPE_DIGITS_MASK                     0xe0
   290	
   291	
   292	/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
   293	#define U_SHAPE_DIGIT_TYPE_AN                   0
   294	
   295	/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
   296	#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED          0x100
   297	
   298	/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
   299	#define U_SHAPE_DIGIT_TYPE_RESERVED             0x200
   300	
   301	/** Bit mask for digit type options. @stable ICU 2.0 */
   302	#define U_SHAPE_DIGIT_TYPE_MASK                 0x300 /* I need to change this from 0x3f00 to 0x300 */
   303	
   304	/** 
   305	 * Tashkeel aggregation option:
   306	 * Replaces any combination of U+0651 with one of
   307	 * U+064C, U+064D, U+064E, U+064F, U+0650 with
   308	 * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
   309	 * @stable ICU 3.6
   310	 */
   311	#define U_SHAPE_AGGREGATE_TASHKEEL              0x4000
   312	/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
   313	#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP         0
   314	/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
   315	#define U_SHAPE_AGGREGATE_TASHKEEL_MASK         0x4000
   316	
   317	/** 
   318	 * Presentation form option:
   319	 * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
   320	 * characters with 0+06xx characters, before shaping.
   321	 * @stable ICU 3.6
   322	 */
   323	#define U_SHAPE_PRESERVE_PRESENTATION           0x8000
   324	/** Presentation form option: 
   325	 * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with 
   326	 * their unshaped correspondents in range 0+06xx, before shaping.
   327	 * @stable ICU 3.6 
   328	 */
   329	#define U_SHAPE_PRESERVE_PRESENTATION_NOOP      0
   330	/** Bit mask for preserve presentation form. @stable ICU 3.6 */
   331	#define U_SHAPE_PRESERVE_PRESENTATION_MASK      0x8000
   332	
   333	/* Seen Tail option */ 
   334	/**
   335	 * Memory option: the result must have the same length as the source.
   336	 * Shaping mode: The SEEN family character will expand into two characters using space near 
   337	 *               the SEEN family character(i.e. the space after the character).
   338	 *               If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) 
   339	 *               will be set in pErrorCode
   340	 *
   341	 * De-shaping mode: Any Seen character followed by Tail character will be
   342	 *                  replaced by one cell Seen and a space will replace the Tail.
   343	 * Affects: Seen options
   344	 * @stable ICU 4.2
   345	 */
   346	#define U_SHAPE_SEEN_TWOCELL_NEAR     0x200000
   347	
   348	/**
   349	 * Bit mask for Seen memory options. 
   350	 * @stable ICU 4.2
   351	 */
   352	#define U_SHAPE_SEEN_MASK             0x700000
   353	
   354	/* YehHamza option */ 
   355	/**
   356	 * Memory option: the result must have the same length as the source.
   357	 * Shaping mode: The YEHHAMZA character will expand into two characters using space near it 
   358	 *              (i.e. the space after the character
   359	 *               If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) 
   360	 *               will be set in pErrorCode
   361	 *
   362	 * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
   363	 *                  replaced by one cell YehHamza and space will replace the Hamza.
   364	 * Affects: YehHamza options
   365	 * @stable ICU 4.2
   366	 */
   367	#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR      0x1000000
   368	
   369	
   370	/**
   371	 * Bit mask for YehHamza memory options. 
   372	 * @stable ICU 4.2
   373	 */
   374	#define U_SHAPE_YEHHAMZA_MASK              0x3800000
   375	
   376	/* New Tashkeel options */ 
   377	/**
   378	 * Memory option: the result must have the same length as the source.
   379	 * Shaping mode: Tashkeel characters will be replaced by spaces. 
   380	 *               Spaces will be placed at beginning of the buffer
   381	 *
   382	 * De-shaping mode: N/A
   383	 * Affects: Tashkeel options
   384	 * @stable ICU 4.2
   385	 */
   386	#define U_SHAPE_TASHKEEL_BEGIN                      0x40000
   387	
   388	/**
   389	 * Memory option: the result must have the same length as the source.
   390	 * Shaping mode: Tashkeel characters will be replaced by spaces. 
   391	 *               Spaces will be placed at end of the buffer
   392	 *
   393	 * De-shaping mode: N/A
   394	 * Affects: Tashkeel options
   395	 * @stable ICU 4.2
   396	 */
   397	#define U_SHAPE_TASHKEEL_END                        0x60000
   398	
   399	/**
   400	 * Memory option: allow the result to have a different length than the source.
   401	 * Shaping mode: Tashkeel characters will be removed, buffer length will shrink. 
   402	 * De-shaping mode: N/A 
   403	 *
   404	 * Affect: Tashkeel options
   405	 * @stable ICU 4.2
   406	 */
   407	#define U_SHAPE_TASHKEEL_RESIZE                     0x80000
   408	
   409	/**
   410	 * Memory option: the result must have the same length as the source.
   411	 * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent
   412	 *               characters (i.e. shaped on Tatweel) or replaced by space if it is not connected.
   413	 *
   414	 * De-shaping mode: N/A
   415	 * Affects: YehHamza options
   416	 * @stable ICU 4.2
   417	 */
   418	#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL         0xC0000
   419	
   420	/** 
   421	 * Bit mask for Tashkeel replacement with Space or Tatweel memory options. 
   422	 * @stable ICU 4.2
   423	 */
   424	#define U_SHAPE_TASHKEEL_MASK                       0xE0000
   425	
   426	
   427	/* Space location Control options */ 
   428	/**
   429	 * This option affect the meaning of BEGIN and END options. if this option is not used the default
   430	 * for BEGIN and END will be as following: 
   431	 * The Default (for both Visual LTR, Visual RTL and Logical Text)
   432	 *           1. BEGIN always refers to the start address of physical memory.
   433	 *           2. END always refers to the end address of physical memory.
   434	 *
   435	 * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text. 
   436	 *
   437	 * The effect on BEGIN and END Memory Options will be as following:
   438	 *    A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text(
   439	 *       corresponding to the physical memory address end for Visual LTR text, Same as END in 
   440	 *       default behavior)
   441	 *    B. BEGIN For Logical text: Same as BEGIN in default behavior. 
   442	 *    C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding
   443	 *       to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior.
   444	 *    D. END For Logical text: Same as END in default behavior). 
   445	 * Affects: All LamAlef BEGIN, END and AUTO options.
   446	 * @stable ICU 4.2
   447	 */
   448	#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000
   449	
   450	/**
   451	 * Bit mask for swapping BEGIN and END for Visual LTR text 
   452	 * @stable ICU 4.2
   453	 */
   454	#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK      0x4000000
   455	
   456	/**
   457	 * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73). 
   458	 * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
   459	 * De-shaping will not use this option as it will always search for both the new Unicode code point for the 
   460	 * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
   461	 * Seen-Family letter accordingly.
   462	 *
   463	 * Shaping Mode: Only shaping.
   464	 * De-shaping Mode: N/A.
   465	 * Affects: All Seen options
   466	 * @stable ICU 4.8
   467	 */
   468	#define U_SHAPE_TAIL_NEW_UNICODE        0x8000000
   469	
   470	/**
   471	 * Bit mask for new Unicode Tail option 
   472	 * @stable ICU 4.8
   473	 */
   474	#define U_SHAPE_TAIL_TYPE_MASK          0x8000000
   475	
   476	#endif