xref: /PHP-5.3/ext/pcre/pcrelib/testdata/testinput7 (revision 357ab3cb)
1/-- These tests for Unicode property support test PCRE's API and show some of
2    the compiled code. They are not Perl-compatible. --/
3
4/[\p{L}]/DZ
5
6/[\p{^L}]/DZ
7
8/[\P{L}]/DZ
9
10/[\P{^L}]/DZ
11
12/[abc\p{L}\x{0660}]/8DZ
13
14/[\p{Nd}]/8DZ
15    1234
16
17/[\p{Nd}+-]+/8DZ
18    1234
19    12-34
20    12+\x{661}-34
21    ** Failers
22    abcd
23
24/[\x{105}-\x{109}]/8iDZ
25    \x{104}
26    \x{105}
27    \x{109}
28    ** Failers
29    \x{100}
30    \x{10a}
31
32/[z-\x{100}]/8iDZ
33    Z
34    z
35    \x{39c}
36    \x{178}
37    |
38    \x{80}
39    \x{ff}
40    \x{100}
41    \x{101}
42    ** Failers
43    \x{102}
44    Y
45    y
46
47/[z-\x{100}]/8DZi
48
49/(?:[\PPa*]*){8,}/
50
51/[\P{Any}]/BZ
52
53/[\P{Any}\E]/BZ
54
55/(\P{Yi}+\277)/
56
57/(\P{Yi}+\277)?/
58
59/(?<=\P{Yi}{3}A)X/
60
61/\p{Yi}+(\P{Yi}+)(?1)/
62
63/(\P{Yi}{2}\277)?/
64
65/[\P{Yi}A]/
66
67/[\P{Yi}\P{Yi}\P{Yi}A]/
68
69/[^\P{Yi}A]/
70
71/[^\P{Yi}\P{Yi}\P{Yi}A]/
72
73/(\P{Yi}*\277)*/
74
75/(\P{Yi}*?\277)*/
76
77/(\p{Yi}*+\277)*/
78
79/(\P{Yi}?\277)*/
80
81/(\P{Yi}??\277)*/
82
83/(\p{Yi}?+\277)*/
84
85/(\P{Yi}{0,3}\277)*/
86
87/(\P{Yi}{0,3}?\277)*/
88
89/(\p{Yi}{0,3}+\277)*/
90
91/\p{Zl}{2,3}+/8BZ
92    


93    \x{2028}\x{2028}\x{2028}
94
95/\p{Zl}/8BZ
96
97/\p{Lu}{3}+/8BZ
98
99/\pL{2}+/8BZ
100
101/\p{Cc}{2}+/8BZ
102
103/^\p{Cs}/8
104    \?\x{dfff}
105    ** Failers
106    \x{09f}
107
108/^\p{Sc}+/8
109    $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
110    \x{9f2}
111    ** Failers
112    X
113    \x{2c2}
114
115/^\p{Zs}/8
116    \ \
117    \x{a0}
118    \x{1680}
119    \x{180e}
120    \x{2000}
121    \x{2001}
122    ** Failers
123    \x{2028}
124    \x{200d}
125
126/-- These four are here rather than in test 6 because Perl has problems with
127    the negative versions of the properties. --/
128
129/\p{^Lu}/8i
130    1234
131    ** Failers
132    ABC
133
134/\P{Lu}/8i
135    1234
136    ** Failers
137    ABC
138
139/\p{Ll}/8i
140    a
141    Az
142    ** Failers
143    ABC
144
145/\p{Lu}/8i
146    A
147    a\x{10a0}B
148    ** Failers
149    a
150    \x{1d00}
151
152/[\x{c0}\x{391}]/8i
153    \x{c0}
154    \x{e0}
155
156/-- The next two are special cases where the lengths of the different cases of
157the same character differ. The first went wrong with heap frame storage; the
158second was broken in all cases. --/
159
160/^\x{023a}+?(\x{0130}+)/8i
161  \x{023a}\x{2c65}\x{0130}
162
163/^\x{023a}+([^X])/8i
164  \x{023a}\x{2c65}X
165
166/\x{c0}+\x{116}+/8i
167    \x{c0}\x{e0}\x{116}\x{117}
168
169/[\x{c0}\x{116}]+/8i
170    \x{c0}\x{e0}\x{116}\x{117}
171
172/(\x{de})\1/8i
173    \x{de}\x{de}
174    \x{de}\x{fe}
175    \x{fe}\x{fe}
176    \x{fe}\x{de}
177
178/^\x{c0}$/8i
179    \x{c0}
180    \x{e0}
181
182/^\x{e0}$/8i
183    \x{c0}
184    \x{e0}
185
186/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
187will match it only with UCP support, because without that it has no notion
188of case for anything other than the ASCII letters. --/
189
190/((?i)[\x{c0}])/8
191    \x{c0}
192    \x{e0}
193
194/(?i:[\x{c0}])/8
195    \x{c0}
196    \x{e0}
197
198/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/
199
200/^\p{Xan}/8
201    ABCD
202    1234
203    \x{6ca}
204    \x{a6c}
205    \x{10a7}
206    ** Failers
207    _ABC
208
209/^\p{Xan}+/8
210    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
211    ** Failers
212    _ABC
213
214/^\p{Xan}+?/8
215    \x{6ca}\x{a6c}\x{10a7}_
216
217/^\p{Xan}*/8
218    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
219
220/^\p{Xan}{2,9}/8
221    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
222
223/^\p{Xan}{2,9}?/8
224    \x{6ca}\x{a6c}\x{10a7}_
225
226/^[\p{Xan}]/8
227    ABCD1234_
228    1234abcd_
229    \x{6ca}
230    \x{a6c}
231    \x{10a7}
232    ** Failers
233    _ABC
234
235/^[\p{Xan}]+/8
236    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
237    ** Failers
238    _ABC
239
240/^>\p{Xsp}/8
241    >\x{1680}\x{2028}\x{0b}
242    >\x{a0}
243    ** Failers
244    \x{0b}
245
246/^>\p{Xsp}+/8
247    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
248
249/^>\p{Xsp}+?/8
250    >\x{1680}\x{2028}\x{0b}
251
252/^>\p{Xsp}*/8
253    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
254
255/^>\p{Xsp}{2,9}/8
256    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
257
258/^>\p{Xsp}{2,9}?/8
259    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
260
261/^>[\p{Xsp}]/8
262    >\x{2028}\x{0b}
263
264/^>[\p{Xsp}]+/8
265    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
266
267/^>\p{Xps}/8
268    >\x{1680}\x{2028}\x{0b}
269    >\x{a0}
270    ** Failers
271    \x{0b}
272
273/^>\p{Xps}+/8
274    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
275
276/^>\p{Xps}+?/8
277    >\x{1680}\x{2028}\x{0b}
278
279/^>\p{Xps}*/8
280    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
281
282/^>\p{Xps}{2,9}/8
283    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
284
285/^>\p{Xps}{2,9}?/8
286    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
287
288/^>[\p{Xps}]/8
289    >\x{2028}\x{0b}
290
291/^>[\p{Xps}]+/8
292    > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
293
294/^\p{Xwd}/8
295    ABCD
296    1234
297    \x{6ca}
298    \x{a6c}
299    \x{10a7}
300    _ABC
301    ** Failers
302    []
303
304/^\p{Xwd}+/8
305    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
306
307/^\p{Xwd}+?/8
308    \x{6ca}\x{a6c}\x{10a7}_
309
310/^\p{Xwd}*/8
311    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
312
313/^\p{Xwd}{2,9}/8
314    A_B12\x{6ca}\x{a6c}\x{10a7}
315
316/^\p{Xwd}{2,9}?/8
317    \x{6ca}\x{a6c}\x{10a7}_
318
319/^[\p{Xwd}]/8
320    ABCD1234_
321    1234abcd_
322    \x{6ca}
323    \x{a6c}
324    \x{10a7}
325    _ABC
326    ** Failers
327    []
328
329/^[\p{Xwd}]+/8
330    ABCD1234\x{6ca}\x{a6c}\x{10a7}_
331
332/-- A check not in UTF-8 mode --/
333
334/^[\p{Xwd}]+/
335    ABCD1234_
336
337/-- Some negative checks --/
338
339/^[\P{Xwd}]+/8
340    !.+\x{019}\x{35a}AB
341
342/^[\p{^Xwd}]+/8
343    !.+\x{019}\x{35a}AB
344
345/[\D]/WBZ8
346    1\x{3c8}2
347
348/[\d]/WBZ8
349    >\x{6f4}<
350
351/[\S]/WBZ8
352    \x{1680}\x{6f4}\x{1680}
353
354/[\s]/WBZ8
355    >\x{1680}<
356
357/[\W]/WBZ8
358    A\x{1712}B
359
360/[\w]/WBZ8
361    >\x{1723}<
362
363/\D/WBZ8
364    1\x{3c8}2
365
366/\d/WBZ8
367    >\x{6f4}<
368
369/\S/WBZ8
370    \x{1680}\x{6f4}\x{1680}
371
372/\s/WBZ8
373    >\x{1680}>
374
375/\W/WBZ8
376    A\x{1712}B
377
378/\w/WBZ8
379    >\x{1723}<
380
381/[[:alpha:]]/WBZ
382
383/[[:lower:]]/WBZ
384
385/[[:upper:]]/WBZ
386
387/[[:alnum:]]/WBZ
388
389/[[:ascii:]]/WBZ
390
391/[[:cntrl:]]/WBZ
392
393/[[:digit:]]/WBZ
394
395/[[:graph:]]/WBZ
396
397/[[:print:]]/WBZ
398
399/[[:punct:]]/WBZ
400
401/[[:space:]]/WBZ
402
403/[[:word:]]/WBZ
404
405/[[:xdigit:]]/WBZ
406
407/-- Unicode properties for \b abd \B --/
408
409/\b...\B/8W
410    abc_
411    \x{37e}abc\x{376}
412    \x{37e}\x{376}\x{371}\x{393}\x{394}
413    !\x{c0}++\x{c1}\x{c2}
414    !\x{c0}+++++
415
416/-- Without PCRE_UCP, non-ASCII always fail, even if < 256  --/
417
418/\b...\B/8
419    abc_
420    ** Failers
421    \x{37e}abc\x{376}
422    \x{37e}\x{376}\x{371}\x{393}\x{394}
423    !\x{c0}++\x{c1}\x{c2}
424    !\x{c0}+++++
425
426/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties  --/
427
428/\b...\B/W
429    abc_
430    !\x{c0}++\x{c1}\x{c2}
431    !\x{c0}+++++
432
433/-- Some of these are silly, but they check various combinations --/
434
435/[[:^alpha:][:^cntrl:]]+/8WBZ
436    123
437    abc
438
439/[[:^cntrl:][:^alpha:]]+/8WBZ
440    123
441    abc
442
443/[[:alpha:]]+/8WBZ
444    abc
445
446/[[:^alpha:]\S]+/8WBZ
447    123
448    abc
449
450/[^\d]+/8WBZ
451    abc123
452    abc\x{123}
453    \x{660}abc
454
455/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ
456
457/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ
458
459/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ
460
461/\p{Han}+X\p{Greek}+\x{370}/BZ8
462
463/\p{Xan}+!\p{Xan}+A/BZ
464
465/\p{Xsp}+!\p{Xsp}\t/BZ
466
467/\p{Xps}+!\p{Xps}\t/BZ
468
469/\p{Xwd}+!\p{Xwd}_/BZ
470
471/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ
472
473/-- These behaved oddly in Perl, so they are kept in this test --/
474
475/(\x{23a}\x{23a}\x{23a})?\1/8i
476    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
477
478/(ȺȺȺ)?\1/8i
479    ȺȺȺⱥⱥ
480
481/(\x{23a}\x{23a}\x{23a})?\1/8i
482    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
483
484/(ȺȺȺ)?\1/8i
485    ȺȺȺⱥⱥⱥ
486
487/(\x{23a}\x{23a}\x{23a})\1/8i
488    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
489
490/(ȺȺȺ)\1/8i
491    ȺȺȺⱥⱥ
492
493/(\x{23a}\x{23a}\x{23a})\1/8i
494    \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
495
496/(ȺȺȺ)\1/8i
497    ȺȺȺⱥⱥⱥ
498
499/(\x{2c65}\x{2c65})\1/8i
500    \x{2c65}\x{2c65}\x{23a}\x{23a}
501
502/(ⱥⱥ)\1/8i
503    ⱥⱥȺȺ
504
505/(\x{23a}\x{23a}\x{23a})\1Y/8i
506    X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
507
508/(\x{2c65}\x{2c65})\1Y/8i
509    X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
510
511/-- --/
512
513/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/
514
515/^[\p{Batak}]/8
516    \x{1bc0}
517    \x{1bff}
518    ** Failers
519    \x{1bf4}
520
521/^[\p{Brahmi}]/8
522    \x{11000}
523    \x{1106f}
524    ** Failers
525    \x{1104e}
526
527/^[\p{Mandaic}]/8
528    \x{840}
529    \x{85e}
530    ** Failers
531    \x{85c}
532    \x{85d}
533
534/-- --/
535
536/(\X*)(.)/s8
537    A\x{300}
538
539/^S(\X*)e(\X*)$/8
540    Stéréo
541
542/^\X/8
543    ́réo
544
545/^a\X41z/<JS>
546    aX41z
547    *** Failers
548    aAz
549
550/(?<=ab\Cde)X/8
551
552/\X/
553    a\P
554    a\P\P
555
556/\Xa/
557    aa\P
558    aa\P\P
559
560/\X{2}/
561    aa\P
562    aa\P\P
563
564/\X+a/
565    a\P
566    aa\P
567    aa\P\P
568
569/\X+?a/
570    a\P
571    ab\P
572    aa\P
573    aa\P\P
574    aba\P
575
576/-- These Unicode 6.1.0 scripts are not known to Perl. --/
577
578/\p{Chakma}\d/8W
579    \x{11100}\x{1113c}
580
581/\p{Takri}\d/8W
582    \x{11680}\x{116c0}
583
584/^\X/8
585    A\P
586    A\P\P
587    A\x{300}\x{301}\P
588    A\x{300}\x{301}\P\P
589    A\x{301}\P
590    A\x{301}\P\P
591
592/^\X{2,3}/8
593    A\P
594    A\P\P
595    AA\P
596    AA\P\P
597    A\x{300}\x{301}\P
598    A\x{300}\x{301}\P\P
599    A\x{300}\x{301}A\x{300}\x{301}\P
600    A\x{300}\x{301}A\x{300}\x{301}\P\P
601
602/^\X{2}/8
603    AA\P
604    AA\P\P
605    A\x{300}\x{301}A\x{300}\x{301}\P
606    A\x{300}\x{301}A\x{300}\x{301}\P\P
607
608/^\X+/8
609    AA\P
610    AA\P\P
611
612/^\X+?Z/8
613    AA\P
614    AA\P\P
615
616/A\x{3a3}B/8iDZ
617
618/\x{3a3}B/8iDZ
619
620/[\x{3a3}]/8iBZ
621
622/[^\x{3a3}]/8iBZ
623
624/[\x{3a3}]+/8iBZ
625
626/[^\x{3a3}]+/8iBZ
627
628/a*\x{3a3}/8iBZ
629
630/\x{3a3}+a/8iBZ
631
632/\x{3a3}*\x{3c2}/8iBZ
633
634/\x{3a3}{3}/8i+
635    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
636
637/\x{3a3}{2,4}/8i+
638    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
639
640/\x{3a3}{2,4}?/8i+
641    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
642
643/\x{3a3}+./8i+
644    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
645
646/\x{3a3}++./8i+
647    ** Failers
648    \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
649
650/\x{3a3}*\x{3c2}/8iBZ
651
652/[^\x{3a3}]*\x{3c2}/8iBZ
653
654/[^a]*\x{3c2}/8iBZ
655
656/ist/8iBZ
657    ikt
658
659/is+t/8i
660    iSs\x{17f}t
661    ikt
662
663/is+?t/8i
664    ikt
665
666/is?t/8i
667    ikt
668
669/is{2}t/8i
670    iskt
671
672/-- End of testinput7 --/
673