1/-- These tests for Unicode property support test PCRE's API and show some of 2 the compiled code. They are not Perl-compatible. --/ 3 4/[\p{L}]/DZ 5 6/[\p{^L}]/DZ 7 8/[\P{L}]/DZ 9 10/[\P{^L}]/DZ 11 12/[abc\p{L}\x{0660}]/8DZ 13 14/[\p{Nd}]/8DZ 15 1234 16 17/[\p{Nd}+-]+/8DZ 18 1234 19 12-34 20 12+\x{661}-34 21 ** Failers 22 abcd 23 24/[\x{105}-\x{109}]/8iDZ 25 \x{104} 26 \x{105} 27 \x{109} 28 ** Failers 29 \x{100} 30 \x{10a} 31 32/[z-\x{100}]/8iDZ 33 Z 34 z 35 \x{39c} 36 \x{178} 37 | 38 \x{80} 39 \x{ff} 40 \x{100} 41 \x{101} 42 ** Failers 43 \x{102} 44 Y 45 y 46 47/[z-\x{100}]/8DZi 48 49/(?:[\PPa*]*){8,}/ 50 51/[\P{Any}]/BZ 52 53/[\P{Any}\E]/BZ 54 55/(\P{Yi}+\277)/ 56 57/(\P{Yi}+\277)?/ 58 59/(?<=\P{Yi}{3}A)X/ 60 61/\p{Yi}+(\P{Yi}+)(?1)/ 62 63/(\P{Yi}{2}\277)?/ 64 65/[\P{Yi}A]/ 66 67/[\P{Yi}\P{Yi}\P{Yi}A]/ 68 69/[^\P{Yi}A]/ 70 71/[^\P{Yi}\P{Yi}\P{Yi}A]/ 72 73/(\P{Yi}*\277)*/ 74 75/(\P{Yi}*?\277)*/ 76 77/(\p{Yi}*+\277)*/ 78 79/(\P{Yi}?\277)*/ 80 81/(\P{Yi}??\277)*/ 82 83/(\p{Yi}?+\277)*/ 84 85/(\P{Yi}{0,3}\277)*/ 86 87/(\P{Yi}{0,3}?\277)*/ 88 89/(\p{Yi}{0,3}+\277)*/ 90 91/\p{Zl}{2,3}+/8BZ 92 93 \x{2028}\x{2028}\x{2028} 94 95/\p{Zl}/8BZ 96 97/\p{Lu}{3}+/8BZ 98 99/\pL{2}+/8BZ 100 101/\p{Cc}{2}+/8BZ 102 103/^\p{Cs}/8 104 \?\x{dfff} 105 ** Failers 106 \x{09f} 107 108/^\p{Sc}+/8 109 $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} 110 \x{9f2} 111 ** Failers 112 X 113 \x{2c2} 114 115/^\p{Zs}/8 116 \ \ 117 \x{a0} 118 \x{1680} 119 \x{180e} 120 \x{2000} 121 \x{2001} 122 ** Failers 123 \x{2028} 124 \x{200d} 125 126/-- These four are here rather than in test 6 because Perl has problems with 127 the negative versions of the properties. --/ 128 129/\p{^Lu}/8i 130 1234 131 ** Failers 132 ABC 133 134/\P{Lu}/8i 135 1234 136 ** Failers 137 ABC 138 139/\p{Ll}/8i 140 a 141 Az 142 ** Failers 143 ABC 144 145/\p{Lu}/8i 146 A 147 a\x{10a0}B 148 ** Failers 149 a 150 \x{1d00} 151 152/[\x{c0}\x{391}]/8i 153 \x{c0} 154 \x{e0} 155 156/-- The next two are special cases where the lengths of the different cases of 157the same character differ. The first went wrong with heap frame storage; the 158second was broken in all cases. --/ 159 160/^\x{023a}+?(\x{0130}+)/8i 161 \x{023a}\x{2c65}\x{0130} 162 163/^\x{023a}+([^X])/8i 164 \x{023a}\x{2c65}X 165 166/\x{c0}+\x{116}+/8i 167 \x{c0}\x{e0}\x{116}\x{117} 168 169/[\x{c0}\x{116}]+/8i 170 \x{c0}\x{e0}\x{116}\x{117} 171 172/(\x{de})\1/8i 173 \x{de}\x{de} 174 \x{de}\x{fe} 175 \x{fe}\x{fe} 176 \x{fe}\x{de} 177 178/^\x{c0}$/8i 179 \x{c0} 180 \x{e0} 181 182/^\x{e0}$/8i 183 \x{c0} 184 \x{e0} 185 186/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE 187will match it only with UCP support, because without that it has no notion 188of case for anything other than the ASCII letters. --/ 189 190/((?i)[\x{c0}])/8 191 \x{c0} 192 \x{e0} 193 194/(?i:[\x{c0}])/8 195 \x{c0} 196 \x{e0} 197 198/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ 199 200/^\p{Xan}/8 201 ABCD 202 1234 203 \x{6ca} 204 \x{a6c} 205 \x{10a7} 206 ** Failers 207 _ABC 208 209/^\p{Xan}+/8 210 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 211 ** Failers 212 _ABC 213 214/^\p{Xan}+?/8 215 \x{6ca}\x{a6c}\x{10a7}_ 216 217/^\p{Xan}*/8 218 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 219 220/^\p{Xan}{2,9}/8 221 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 222 223/^\p{Xan}{2,9}?/8 224 \x{6ca}\x{a6c}\x{10a7}_ 225 226/^[\p{Xan}]/8 227 ABCD1234_ 228 1234abcd_ 229 \x{6ca} 230 \x{a6c} 231 \x{10a7} 232 ** Failers 233 _ABC 234 235/^[\p{Xan}]+/8 236 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 237 ** Failers 238 _ABC 239 240/^>\p{Xsp}/8 241 >\x{1680}\x{2028}\x{0b} 242 >\x{a0} 243 ** Failers 244 \x{0b} 245 246/^>\p{Xsp}+/8 247 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 248 249/^>\p{Xsp}+?/8 250 >\x{1680}\x{2028}\x{0b} 251 252/^>\p{Xsp}*/8 253 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 254 255/^>\p{Xsp}{2,9}/8 256 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 257 258/^>\p{Xsp}{2,9}?/8 259 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 260 261/^>[\p{Xsp}]/8 262 >\x{2028}\x{0b} 263 264/^>[\p{Xsp}]+/8 265 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 266 267/^>\p{Xps}/8 268 >\x{1680}\x{2028}\x{0b} 269 >\x{a0} 270 ** Failers 271 \x{0b} 272 273/^>\p{Xps}+/8 274 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 275 276/^>\p{Xps}+?/8 277 >\x{1680}\x{2028}\x{0b} 278 279/^>\p{Xps}*/8 280 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 281 282/^>\p{Xps}{2,9}/8 283 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 284 285/^>\p{Xps}{2,9}?/8 286 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 287 288/^>[\p{Xps}]/8 289 >\x{2028}\x{0b} 290 291/^>[\p{Xps}]+/8 292 > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 293 294/^\p{Xwd}/8 295 ABCD 296 1234 297 \x{6ca} 298 \x{a6c} 299 \x{10a7} 300 _ABC 301 ** Failers 302 [] 303 304/^\p{Xwd}+/8 305 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 306 307/^\p{Xwd}+?/8 308 \x{6ca}\x{a6c}\x{10a7}_ 309 310/^\p{Xwd}*/8 311 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 312 313/^\p{Xwd}{2,9}/8 314 A_B12\x{6ca}\x{a6c}\x{10a7} 315 316/^\p{Xwd}{2,9}?/8 317 \x{6ca}\x{a6c}\x{10a7}_ 318 319/^[\p{Xwd}]/8 320 ABCD1234_ 321 1234abcd_ 322 \x{6ca} 323 \x{a6c} 324 \x{10a7} 325 _ABC 326 ** Failers 327 [] 328 329/^[\p{Xwd}]+/8 330 ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 331 332/-- A check not in UTF-8 mode --/ 333 334/^[\p{Xwd}]+/ 335 ABCD1234_ 336 337/-- Some negative checks --/ 338 339/^[\P{Xwd}]+/8 340 !.+\x{019}\x{35a}AB 341 342/^[\p{^Xwd}]+/8 343 !.+\x{019}\x{35a}AB 344 345/[\D]/WBZ8 346 1\x{3c8}2 347 348/[\d]/WBZ8 349 >\x{6f4}< 350 351/[\S]/WBZ8 352 \x{1680}\x{6f4}\x{1680} 353 354/[\s]/WBZ8 355 >\x{1680}< 356 357/[\W]/WBZ8 358 A\x{1712}B 359 360/[\w]/WBZ8 361 >\x{1723}< 362 363/\D/WBZ8 364 1\x{3c8}2 365 366/\d/WBZ8 367 >\x{6f4}< 368 369/\S/WBZ8 370 \x{1680}\x{6f4}\x{1680} 371 372/\s/WBZ8 373 >\x{1680}> 374 375/\W/WBZ8 376 A\x{1712}B 377 378/\w/WBZ8 379 >\x{1723}< 380 381/[[:alpha:]]/WBZ 382 383/[[:lower:]]/WBZ 384 385/[[:upper:]]/WBZ 386 387/[[:alnum:]]/WBZ 388 389/[[:ascii:]]/WBZ 390 391/[[:cntrl:]]/WBZ 392 393/[[:digit:]]/WBZ 394 395/[[:graph:]]/WBZ 396 397/[[:print:]]/WBZ 398 399/[[:punct:]]/WBZ 400 401/[[:space:]]/WBZ 402 403/[[:word:]]/WBZ 404 405/[[:xdigit:]]/WBZ 406 407/-- Unicode properties for \b abd \B --/ 408 409/\b...\B/8W 410 abc_ 411 \x{37e}abc\x{376} 412 \x{37e}\x{376}\x{371}\x{393}\x{394} 413 !\x{c0}++\x{c1}\x{c2} 414 !\x{c0}+++++ 415 416/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ 417 418/\b...\B/8 419 abc_ 420 ** Failers 421 \x{37e}abc\x{376} 422 \x{37e}\x{376}\x{371}\x{393}\x{394} 423 !\x{c0}++\x{c1}\x{c2} 424 !\x{c0}+++++ 425 426/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ 427 428/\b...\B/W 429 abc_ 430 !\x{c0}++\x{c1}\x{c2} 431 !\x{c0}+++++ 432 433/-- Some of these are silly, but they check various combinations --/ 434 435/[[:^alpha:][:^cntrl:]]+/8WBZ 436 123 437 abc 438 439/[[:^cntrl:][:^alpha:]]+/8WBZ 440 123 441 abc 442 443/[[:alpha:]]+/8WBZ 444 abc 445 446/[[:^alpha:]\S]+/8WBZ 447 123 448 abc 449 450/[^\d]+/8WBZ 451 abc123 452 abc\x{123} 453 \x{660}abc 454 455/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ 456 457/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ 458 459/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ 460 461/\p{Han}+X\p{Greek}+\x{370}/BZ8 462 463/\p{Xan}+!\p{Xan}+A/BZ 464 465/\p{Xsp}+!\p{Xsp}\t/BZ 466 467/\p{Xps}+!\p{Xps}\t/BZ 468 469/\p{Xwd}+!\p{Xwd}_/BZ 470 471/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ 472 473/-- These behaved oddly in Perl, so they are kept in this test --/ 474 475/(\x{23a}\x{23a}\x{23a})?\1/8i 476 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} 477 478/(ȺȺȺ)?\1/8i 479 ȺȺȺⱥⱥ 480 481/(\x{23a}\x{23a}\x{23a})?\1/8i 482 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 483 484/(ȺȺȺ)?\1/8i 485 ȺȺȺⱥⱥⱥ 486 487/(\x{23a}\x{23a}\x{23a})\1/8i 488 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} 489 490/(ȺȺȺ)\1/8i 491 ȺȺȺⱥⱥ 492 493/(\x{23a}\x{23a}\x{23a})\1/8i 494 \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 495 496/(ȺȺȺ)\1/8i 497 ȺȺȺⱥⱥⱥ 498 499/(\x{2c65}\x{2c65})\1/8i 500 \x{2c65}\x{2c65}\x{23a}\x{23a} 501 502/(ⱥⱥ)\1/8i 503 ⱥⱥȺȺ 504 505/(\x{23a}\x{23a}\x{23a})\1Y/8i 506 X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ 507 508/(\x{2c65}\x{2c65})\1Y/8i 509 X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ 510 511/-- --/ 512 513/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ 514 515/^[\p{Batak}]/8 516 \x{1bc0} 517 \x{1bff} 518 ** Failers 519 \x{1bf4} 520 521/^[\p{Brahmi}]/8 522 \x{11000} 523 \x{1106f} 524 ** Failers 525 \x{1104e} 526 527/^[\p{Mandaic}]/8 528 \x{840} 529 \x{85e} 530 ** Failers 531 \x{85c} 532 \x{85d} 533 534/-- --/ 535 536/(\X*)(.)/s8 537 A\x{300} 538 539/^S(\X*)e(\X*)$/8 540 Stéréo 541 542/^\X/8 543 ́réo 544 545/^a\X41z/<JS> 546 aX41z 547 *** Failers 548 aAz 549 550/(?<=ab\Cde)X/8 551 552/\X/ 553 a\P 554 a\P\P 555 556/\Xa/ 557 aa\P 558 aa\P\P 559 560/\X{2}/ 561 aa\P 562 aa\P\P 563 564/\X+a/ 565 a\P 566 aa\P 567 aa\P\P 568 569/\X+?a/ 570 a\P 571 ab\P 572 aa\P 573 aa\P\P 574 aba\P 575 576/-- These Unicode 6.1.0 scripts are not known to Perl. --/ 577 578/\p{Chakma}\d/8W 579 \x{11100}\x{1113c} 580 581/\p{Takri}\d/8W 582 \x{11680}\x{116c0} 583 584/^\X/8 585 A\P 586 A\P\P 587 A\x{300}\x{301}\P 588 A\x{300}\x{301}\P\P 589 A\x{301}\P 590 A\x{301}\P\P 591 592/^\X{2,3}/8 593 A\P 594 A\P\P 595 AA\P 596 AA\P\P 597 A\x{300}\x{301}\P 598 A\x{300}\x{301}\P\P 599 A\x{300}\x{301}A\x{300}\x{301}\P 600 A\x{300}\x{301}A\x{300}\x{301}\P\P 601 602/^\X{2}/8 603 AA\P 604 AA\P\P 605 A\x{300}\x{301}A\x{300}\x{301}\P 606 A\x{300}\x{301}A\x{300}\x{301}\P\P 607 608/^\X+/8 609 AA\P 610 AA\P\P 611 612/^\X+?Z/8 613 AA\P 614 AA\P\P 615 616/A\x{3a3}B/8iDZ 617 618/\x{3a3}B/8iDZ 619 620/[\x{3a3}]/8iBZ 621 622/[^\x{3a3}]/8iBZ 623 624/[\x{3a3}]+/8iBZ 625 626/[^\x{3a3}]+/8iBZ 627 628/a*\x{3a3}/8iBZ 629 630/\x{3a3}+a/8iBZ 631 632/\x{3a3}*\x{3c2}/8iBZ 633 634/\x{3a3}{3}/8i+ 635 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 636 637/\x{3a3}{2,4}/8i+ 638 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 639 640/\x{3a3}{2,4}?/8i+ 641 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 642 643/\x{3a3}+./8i+ 644 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 645 646/\x{3a3}++./8i+ 647 ** Failers 648 \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 649 650/\x{3a3}*\x{3c2}/8iBZ 651 652/[^\x{3a3}]*\x{3c2}/8iBZ 653 654/[^a]*\x{3c2}/8iBZ 655 656/ist/8iBZ 657 ikt 658 659/is+t/8i 660 iSs\x{17f}t 661 ikt 662 663/is+?t/8i 664 ikt 665 666/is?t/8i 667 ikt 668 669/is{2}t/8i 670 iskt 671 672/-- End of testinput7 --/ 673