1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016-2022 University of Cambridge 11 12 This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! 13 Instead, modify the maint/GenerateUcpHeader.py script and run it to generate 14 a new version of this code. 15 16 ----------------------------------------------------------------------------- 17 Redistribution and use in source and binary forms, with or without 18 modification, are permitted provided that the following conditions are met: 19 20 * Redistributions of source code must retain the above copyright notice, 21 this list of conditions and the following disclaimer. 22 23 * Redistributions in binary form must reproduce the above copyright 24 notice, this list of conditions and the following disclaimer in the 25 documentation and/or other materials provided with the distribution. 26 27 * Neither the name of the University of Cambridge nor the names of its 28 contributors may be used to endorse or promote products derived from 29 this software without specific prior written permission. 30 31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 41 POSSIBILITY OF SUCH DAMAGE. 42 ----------------------------------------------------------------------------- 43 */ 44 45 #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD 46 #define PCRE2_UCP_H_IDEMPOTENT_GUARD 47 48 /* This file contains definitions of the Unicode property values that are 49 returned by the UCD access macros and used throughout PCRE2. 50 51 IMPORTANT: The specific values of the first two enums (general and particular 52 character categories) are assumed by the table called catposstab in the file 53 pcre2_auto_possess.c. They are unlikely to change, but should be checked after 54 an update. */ 55 56 /* These are the general character categories. */ 57 58 enum { 59 ucp_C, 60 ucp_L, 61 ucp_M, 62 ucp_N, 63 ucp_P, 64 ucp_S, 65 ucp_Z, 66 }; 67 68 /* These are the particular character categories. */ 69 70 enum { 71 ucp_Cc, /* Control */ 72 ucp_Cf, /* Format */ 73 ucp_Cn, /* Unassigned */ 74 ucp_Co, /* Private use */ 75 ucp_Cs, /* Surrogate */ 76 ucp_Ll, /* Lower case letter */ 77 ucp_Lm, /* Modifier letter */ 78 ucp_Lo, /* Other letter */ 79 ucp_Lt, /* Title case letter */ 80 ucp_Lu, /* Upper case letter */ 81 ucp_Mc, /* Spacing mark */ 82 ucp_Me, /* Enclosing mark */ 83 ucp_Mn, /* Non-spacing mark */ 84 ucp_Nd, /* Decimal number */ 85 ucp_Nl, /* Letter number */ 86 ucp_No, /* Other number */ 87 ucp_Pc, /* Connector punctuation */ 88 ucp_Pd, /* Dash punctuation */ 89 ucp_Pe, /* Close punctuation */ 90 ucp_Pf, /* Final punctuation */ 91 ucp_Pi, /* Initial punctuation */ 92 ucp_Po, /* Other punctuation */ 93 ucp_Ps, /* Open punctuation */ 94 ucp_Sc, /* Currency symbol */ 95 ucp_Sk, /* Modifier symbol */ 96 ucp_Sm, /* Mathematical symbol */ 97 ucp_So, /* Other symbol */ 98 ucp_Zl, /* Line separator */ 99 ucp_Zp, /* Paragraph separator */ 100 ucp_Zs, /* Space separator */ 101 }; 102 103 /* These are Boolean properties. */ 104 105 enum { 106 ucp_ASCII, 107 ucp_ASCII_Hex_Digit, 108 ucp_Alphabetic, 109 ucp_Bidi_Control, 110 ucp_Bidi_Mirrored, 111 ucp_Case_Ignorable, 112 ucp_Cased, 113 ucp_Changes_When_Casefolded, 114 ucp_Changes_When_Casemapped, 115 ucp_Changes_When_Lowercased, 116 ucp_Changes_When_Titlecased, 117 ucp_Changes_When_Uppercased, 118 ucp_Dash, 119 ucp_Default_Ignorable_Code_Point, 120 ucp_Deprecated, 121 ucp_Diacritic, 122 ucp_Emoji, 123 ucp_Emoji_Component, 124 ucp_Emoji_Modifier, 125 ucp_Emoji_Modifier_Base, 126 ucp_Emoji_Presentation, 127 ucp_Extended_Pictographic, 128 ucp_Extender, 129 ucp_Grapheme_Base, 130 ucp_Grapheme_Extend, 131 ucp_Grapheme_Link, 132 ucp_Hex_Digit, 133 ucp_IDS_Binary_Operator, 134 ucp_IDS_Trinary_Operator, 135 ucp_ID_Continue, 136 ucp_ID_Start, 137 ucp_Ideographic, 138 ucp_Join_Control, 139 ucp_Logical_Order_Exception, 140 ucp_Lowercase, 141 ucp_Math, 142 ucp_Noncharacter_Code_Point, 143 ucp_Pattern_Syntax, 144 ucp_Pattern_White_Space, 145 ucp_Prepended_Concatenation_Mark, 146 ucp_Quotation_Mark, 147 ucp_Radical, 148 ucp_Regional_Indicator, 149 ucp_Sentence_Terminal, 150 ucp_Soft_Dotted, 151 ucp_Terminal_Punctuation, 152 ucp_Unified_Ideograph, 153 ucp_Uppercase, 154 ucp_Variation_Selector, 155 ucp_White_Space, 156 ucp_XID_Continue, 157 ucp_XID_Start, 158 /* This must be last */ 159 ucp_Bprop_Count 160 }; 161 162 /* Size of entries in ucd_boolprop_sets[] */ 163 164 #define ucd_boolprop_sets_item_size 2 165 166 /* These are the bidi class values. */ 167 168 enum { 169 ucp_bidiAL, /* Arabic letter */ 170 ucp_bidiAN, /* Arabic number */ 171 ucp_bidiB, /* Paragraph separator */ 172 ucp_bidiBN, /* Boundary neutral */ 173 ucp_bidiCS, /* Common separator */ 174 ucp_bidiEN, /* European number */ 175 ucp_bidiES, /* European separator */ 176 ucp_bidiET, /* European terminator */ 177 ucp_bidiFSI, /* First strong isolate */ 178 ucp_bidiL, /* Left to right */ 179 ucp_bidiLRE, /* Left to right embedding */ 180 ucp_bidiLRI, /* Left to right isolate */ 181 ucp_bidiLRO, /* Left to right override */ 182 ucp_bidiNSM, /* Non-spacing mark */ 183 ucp_bidiON, /* Other neutral */ 184 ucp_bidiPDF, /* Pop directional format */ 185 ucp_bidiPDI, /* Pop directional isolate */ 186 ucp_bidiR, /* Right to left */ 187 ucp_bidiRLE, /* Right to left embedding */ 188 ucp_bidiRLI, /* Right to left isolate */ 189 ucp_bidiRLO, /* Right to left override */ 190 ucp_bidiS, /* Segment separator */ 191 ucp_bidiWS, /* White space */ 192 }; 193 194 /* These are grapheme break properties. The Extended Pictographic property 195 comes from the emoji-data.txt file. */ 196 197 enum { 198 ucp_gbCR, /* 0 */ 199 ucp_gbLF, /* 1 */ 200 ucp_gbControl, /* 2 */ 201 ucp_gbExtend, /* 3 */ 202 ucp_gbPrepend, /* 4 */ 203 ucp_gbSpacingMark, /* 5 */ 204 ucp_gbL, /* 6 Hangul syllable type L */ 205 ucp_gbV, /* 7 Hangul syllable type V */ 206 ucp_gbT, /* 8 Hangul syllable type T */ 207 ucp_gbLV, /* 9 Hangul syllable type LV */ 208 ucp_gbLVT, /* 10 Hangul syllable type LVT */ 209 ucp_gbRegional_Indicator, /* 11 */ 210 ucp_gbOther, /* 12 */ 211 ucp_gbZWJ, /* 13 */ 212 ucp_gbExtended_Pictographic, /* 14 */ 213 }; 214 215 /* These are the script identifications. */ 216 217 enum { 218 /* Scripts which has characters in other scripts. */ 219 ucp_Latin, 220 ucp_Greek, 221 ucp_Cyrillic, 222 ucp_Arabic, 223 ucp_Syriac, 224 ucp_Thaana, 225 ucp_Devanagari, 226 ucp_Bengali, 227 ucp_Gurmukhi, 228 ucp_Gujarati, 229 ucp_Oriya, 230 ucp_Tamil, 231 ucp_Telugu, 232 ucp_Kannada, 233 ucp_Malayalam, 234 ucp_Sinhala, 235 ucp_Myanmar, 236 ucp_Georgian, 237 ucp_Hangul, 238 ucp_Mongolian, 239 ucp_Hiragana, 240 ucp_Katakana, 241 ucp_Bopomofo, 242 ucp_Han, 243 ucp_Yi, 244 ucp_Tagalog, 245 ucp_Hanunoo, 246 ucp_Buhid, 247 ucp_Tagbanwa, 248 ucp_Limbu, 249 ucp_Tai_Le, 250 ucp_Linear_B, 251 ucp_Cypriot, 252 ucp_Buginese, 253 ucp_Coptic, 254 ucp_Glagolitic, 255 ucp_Syloti_Nagri, 256 ucp_Phags_Pa, 257 ucp_Nko, 258 ucp_Kayah_Li, 259 ucp_Javanese, 260 ucp_Kaithi, 261 ucp_Mandaic, 262 ucp_Chakma, 263 ucp_Sharada, 264 ucp_Takri, 265 ucp_Duployan, 266 ucp_Grantha, 267 ucp_Khojki, 268 ucp_Linear_A, 269 ucp_Mahajani, 270 ucp_Manichaean, 271 ucp_Modi, 272 ucp_Old_Permic, 273 ucp_Psalter_Pahlavi, 274 ucp_Khudawadi, 275 ucp_Tirhuta, 276 ucp_Multani, 277 ucp_Adlam, 278 ucp_Masaram_Gondi, 279 ucp_Dogra, 280 ucp_Gunjala_Gondi, 281 ucp_Hanifi_Rohingya, 282 ucp_Sogdian, 283 ucp_Nandinagari, 284 ucp_Yezidi, 285 ucp_Cypro_Minoan, 286 ucp_Old_Uyghur, 287 288 /* Scripts which has no characters in other scripts. */ 289 ucp_Unknown, 290 ucp_Common, 291 ucp_Armenian, 292 ucp_Hebrew, 293 ucp_Thai, 294 ucp_Lao, 295 ucp_Tibetan, 296 ucp_Ethiopic, 297 ucp_Cherokee, 298 ucp_Canadian_Aboriginal, 299 ucp_Ogham, 300 ucp_Runic, 301 ucp_Khmer, 302 ucp_Old_Italic, 303 ucp_Gothic, 304 ucp_Deseret, 305 ucp_Inherited, 306 ucp_Ugaritic, 307 ucp_Shavian, 308 ucp_Osmanya, 309 ucp_Braille, 310 ucp_New_Tai_Lue, 311 ucp_Tifinagh, 312 ucp_Old_Persian, 313 ucp_Kharoshthi, 314 ucp_Balinese, 315 ucp_Cuneiform, 316 ucp_Phoenician, 317 ucp_Sundanese, 318 ucp_Lepcha, 319 ucp_Ol_Chiki, 320 ucp_Vai, 321 ucp_Saurashtra, 322 ucp_Rejang, 323 ucp_Lycian, 324 ucp_Carian, 325 ucp_Lydian, 326 ucp_Cham, 327 ucp_Tai_Tham, 328 ucp_Tai_Viet, 329 ucp_Avestan, 330 ucp_Egyptian_Hieroglyphs, 331 ucp_Samaritan, 332 ucp_Lisu, 333 ucp_Bamum, 334 ucp_Meetei_Mayek, 335 ucp_Imperial_Aramaic, 336 ucp_Old_South_Arabian, 337 ucp_Inscriptional_Parthian, 338 ucp_Inscriptional_Pahlavi, 339 ucp_Old_Turkic, 340 ucp_Batak, 341 ucp_Brahmi, 342 ucp_Meroitic_Cursive, 343 ucp_Meroitic_Hieroglyphs, 344 ucp_Miao, 345 ucp_Sora_Sompeng, 346 ucp_Caucasian_Albanian, 347 ucp_Bassa_Vah, 348 ucp_Elbasan, 349 ucp_Pahawh_Hmong, 350 ucp_Mende_Kikakui, 351 ucp_Mro, 352 ucp_Old_North_Arabian, 353 ucp_Nabataean, 354 ucp_Palmyrene, 355 ucp_Pau_Cin_Hau, 356 ucp_Siddham, 357 ucp_Warang_Citi, 358 ucp_Ahom, 359 ucp_Anatolian_Hieroglyphs, 360 ucp_Hatran, 361 ucp_Old_Hungarian, 362 ucp_SignWriting, 363 ucp_Bhaiksuki, 364 ucp_Marchen, 365 ucp_Newa, 366 ucp_Osage, 367 ucp_Tangut, 368 ucp_Nushu, 369 ucp_Soyombo, 370 ucp_Zanabazar_Square, 371 ucp_Makasar, 372 ucp_Medefaidrin, 373 ucp_Old_Sogdian, 374 ucp_Elymaic, 375 ucp_Nyiakeng_Puachue_Hmong, 376 ucp_Wancho, 377 ucp_Chorasmian, 378 ucp_Dives_Akuru, 379 ucp_Khitan_Small_Script, 380 ucp_Tangsa, 381 ucp_Toto, 382 ucp_Vithkuqi, 383 384 /* This must be last */ 385 ucp_Script_Count 386 }; 387 388 /* Size of entries in ucd_script_sets[] */ 389 390 #define ucd_script_sets_item_size 3 391 392 #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ 393 394 /* End of pcre2_ucp.h */ 395