xref: /PHP-5.3/ext/pcre/pcrelib/ucp.h (revision 357ab3cb)
1 /*************************************************
2 *          Unicode Property Table handler        *
3 *************************************************/
4 
5 #ifndef _UCP_H
6 #define _UCP_H
7 
8 /* This file contains definitions of the property values that are returned by
9 the UCD access macros. New values that are added for new releases of Unicode
10 should always be at the end of each enum, for backwards compatibility.
11 
12 IMPORTANT: Note also that the specific numeric values of the enums have to be
13 the same as the values that are generated by the maint/MultiStage2.py script,
14 where the equivalent property descriptive names are listed in vectors. */
15 
16 /* These are the general character categories. */
17 
18 enum {
19   ucp_C,     /* Other */
20   ucp_L,     /* Letter */
21   ucp_M,     /* Mark */
22   ucp_N,     /* Number */
23   ucp_P,     /* Punctuation */
24   ucp_S,     /* Symbol */
25   ucp_Z      /* Separator */
26 };
27 
28 /* These are the particular character categories. */
29 
30 enum {
31   ucp_Cc,    /* Control */
32   ucp_Cf,    /* Format */
33   ucp_Cn,    /* Unassigned */
34   ucp_Co,    /* Private use */
35   ucp_Cs,    /* Surrogate */
36   ucp_Ll,    /* Lower case letter */
37   ucp_Lm,    /* Modifier letter */
38   ucp_Lo,    /* Other letter */
39   ucp_Lt,    /* Title case letter */
40   ucp_Lu,    /* Upper case letter */
41   ucp_Mc,    /* Spacing mark */
42   ucp_Me,    /* Enclosing mark */
43   ucp_Mn,    /* Non-spacing mark */
44   ucp_Nd,    /* Decimal number */
45   ucp_Nl,    /* Letter number */
46   ucp_No,    /* Other number */
47   ucp_Pc,    /* Connector punctuation */
48   ucp_Pd,    /* Dash punctuation */
49   ucp_Pe,    /* Close punctuation */
50   ucp_Pf,    /* Final punctuation */
51   ucp_Pi,    /* Initial punctuation */
52   ucp_Po,    /* Other punctuation */
53   ucp_Ps,    /* Open punctuation */
54   ucp_Sc,    /* Currency symbol */
55   ucp_Sk,    /* Modifier symbol */
56   ucp_Sm,    /* Mathematical symbol */
57   ucp_So,    /* Other symbol */
58   ucp_Zl,    /* Line separator */
59   ucp_Zp,    /* Paragraph separator */
60   ucp_Zs     /* Space separator */
61 };
62 
63 /* These are grapheme break properties. Note that the code for processing them
64 assumes that the values are less than 16. If more values are added that take
65 the number to 16 or more, the code will have to be rewritten. */
66 
67 enum {
68   ucp_gbCR,                /*  0 */
69   ucp_gbLF,                /*  1 */
70   ucp_gbControl,           /*  2 */
71   ucp_gbExtend,            /*  3 */
72   ucp_gbPrepend,           /*  4 */
73   ucp_gbSpacingMark,       /*  5 */
74   ucp_gbL,                 /*  6 Hangul syllable type L */
75   ucp_gbV,                 /*  7 Hangul syllable type V */
76   ucp_gbT,                 /*  8 Hangul syllable type T */
77   ucp_gbLV,                /*  9 Hangul syllable type LV */
78   ucp_gbLVT,               /* 10 Hangul syllable type LVT */
79   ucp_gbRegionalIndicator, /* 11 */
80   ucp_gbOther              /* 12 */
81 };
82 
83 /* These are the script identifications. */
84 
85 enum {
86   ucp_Arabic,
87   ucp_Armenian,
88   ucp_Bengali,
89   ucp_Bopomofo,
90   ucp_Braille,
91   ucp_Buginese,
92   ucp_Buhid,
93   ucp_Canadian_Aboriginal,
94   ucp_Cherokee,
95   ucp_Common,
96   ucp_Coptic,
97   ucp_Cypriot,
98   ucp_Cyrillic,
99   ucp_Deseret,
100   ucp_Devanagari,
101   ucp_Ethiopic,
102   ucp_Georgian,
103   ucp_Glagolitic,
104   ucp_Gothic,
105   ucp_Greek,
106   ucp_Gujarati,
107   ucp_Gurmukhi,
108   ucp_Han,
109   ucp_Hangul,
110   ucp_Hanunoo,
111   ucp_Hebrew,
112   ucp_Hiragana,
113   ucp_Inherited,
114   ucp_Kannada,
115   ucp_Katakana,
116   ucp_Kharoshthi,
117   ucp_Khmer,
118   ucp_Lao,
119   ucp_Latin,
120   ucp_Limbu,
121   ucp_Linear_B,
122   ucp_Malayalam,
123   ucp_Mongolian,
124   ucp_Myanmar,
125   ucp_New_Tai_Lue,
126   ucp_Ogham,
127   ucp_Old_Italic,
128   ucp_Old_Persian,
129   ucp_Oriya,
130   ucp_Osmanya,
131   ucp_Runic,
132   ucp_Shavian,
133   ucp_Sinhala,
134   ucp_Syloti_Nagri,
135   ucp_Syriac,
136   ucp_Tagalog,
137   ucp_Tagbanwa,
138   ucp_Tai_Le,
139   ucp_Tamil,
140   ucp_Telugu,
141   ucp_Thaana,
142   ucp_Thai,
143   ucp_Tibetan,
144   ucp_Tifinagh,
145   ucp_Ugaritic,
146   ucp_Yi,
147   /* New for Unicode 5.0: */
148   ucp_Balinese,
149   ucp_Cuneiform,
150   ucp_Nko,
151   ucp_Phags_Pa,
152   ucp_Phoenician,
153   /* New for Unicode 5.1: */
154   ucp_Carian,
155   ucp_Cham,
156   ucp_Kayah_Li,
157   ucp_Lepcha,
158   ucp_Lycian,
159   ucp_Lydian,
160   ucp_Ol_Chiki,
161   ucp_Rejang,
162   ucp_Saurashtra,
163   ucp_Sundanese,
164   ucp_Vai,
165   /* New for Unicode 5.2: */
166   ucp_Avestan,
167   ucp_Bamum,
168   ucp_Egyptian_Hieroglyphs,
169   ucp_Imperial_Aramaic,
170   ucp_Inscriptional_Pahlavi,
171   ucp_Inscriptional_Parthian,
172   ucp_Javanese,
173   ucp_Kaithi,
174   ucp_Lisu,
175   ucp_Meetei_Mayek,
176   ucp_Old_South_Arabian,
177   ucp_Old_Turkic,
178   ucp_Samaritan,
179   ucp_Tai_Tham,
180   ucp_Tai_Viet,
181   /* New for Unicode 6.0.0: */
182   ucp_Batak,
183   ucp_Brahmi,
184   ucp_Mandaic,
185   /* New for Unicode 6.1.0: */
186   ucp_Chakma,
187   ucp_Meroitic_Cursive,
188   ucp_Meroitic_Hieroglyphs,
189   ucp_Miao,
190   ucp_Sharada,
191   ucp_Sora_Sompeng,
192   ucp_Takri
193 };
194 
195 #endif
196 
197 /* End of ucp.h */
198