xref: /php-src/ext/mbstring/tests/casemapping.phpt (revision a9035863)
1--TEST--
2Unicode case mapping
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7
8function toCases($str) {
9    echo "String: $str\n";
10    echo "Lower: ", mb_convert_case($str, MB_CASE_LOWER), "\n";
11    echo "Lower Simple: ", mb_convert_case($str, MB_CASE_LOWER_SIMPLE), "\n";
12    echo "Upper: ", mb_convert_case($str, MB_CASE_UPPER), "\n";
13    echo "Upper Simple: ", mb_convert_case($str, MB_CASE_UPPER_SIMPLE), "\n";
14    echo "Title: ", mb_convert_case($str, MB_CASE_TITLE), "\n";
15    echo "Title Simple: ", mb_convert_case($str, MB_CASE_TITLE_SIMPLE), "\n";
16    echo "Fold: ", mb_convert_case($str, MB_CASE_FOLD), "\n";
17    echo "Fold Simple: ", mb_convert_case($str, MB_CASE_FOLD_SIMPLE), "\n";
18    echo "\n";
19}
20
21toCases("ß");
22toCases("ff");
23toCases("İ");
24
25// Make sure that case-conversion in Turkish still works correctly.
26// Using the language-agnostic Unicode case mappings would result in
27// characters that are illegal under ISO-8859-9.
28mb_internal_encoding('ISO-8859-9');
29
30// Capital I with dot (U+0130)
31$str = "\xdd";
32echo bin2hex(mb_convert_case($str, MB_CASE_LOWER)), "\n";
33echo bin2hex(mb_convert_case($str, MB_CASE_LOWER_SIMPLE)), "\n";
34echo bin2hex(mb_convert_case($str, MB_CASE_FOLD)), "\n";
35echo bin2hex(mb_convert_case($str, MB_CASE_FOLD_SIMPLE)), "\n";
36echo "\n";
37
38// Lower i without dot (U+0131)
39$str = "\xfd";
40echo bin2hex(mb_convert_case($str, MB_CASE_UPPER)), "\n";
41echo bin2hex(mb_convert_case($str, MB_CASE_UPPER_SIMPLE)), "\n";
42echo bin2hex(mb_convert_case($str, MB_CASE_FOLD)), "\n";
43echo bin2hex(mb_convert_case($str, MB_CASE_FOLD_SIMPLE)), "\n";
44echo "\n";
45
46// Capital I without dot (U+0049)
47$str = "\x49";
48echo bin2hex(mb_convert_case($str, MB_CASE_LOWER)), "\n";
49echo bin2hex(mb_convert_case($str, MB_CASE_LOWER_SIMPLE)), "\n";
50echo bin2hex(mb_convert_case($str, MB_CASE_FOLD)), "\n";
51echo bin2hex(mb_convert_case($str, MB_CASE_FOLD_SIMPLE)), "\n";
52echo "\n";
53
54// Lower i with dot (U+0069)
55$str = "\x69";
56echo bin2hex(mb_convert_case($str, MB_CASE_UPPER)), "\n";
57echo bin2hex(mb_convert_case($str, MB_CASE_UPPER_SIMPLE)), "\n";
58echo bin2hex(mb_convert_case($str, MB_CASE_FOLD)), "\n";
59echo bin2hex(mb_convert_case($str, MB_CASE_FOLD_SIMPLE)), "\n";
60
61// Check handling of Greek letter capital sigma
62echo mb_convert_case("ΚΑΛΗΣΠΕΡΑ ΣΑΣ", MB_CASE_TITLE, "UTF-8"), "\n";
63echo mb_convert_case("ΚΑΛΗΣΠΕΡΑ ΣΑΣ", MB_CASE_TITLE_SIMPLE, "UTF-8"), "\n";
64echo mb_convert_case("ΚΑΛΗΣΠΕΡΑ ΣΑΣ", MB_CASE_LOWER, "UTF-8"), "\n";
65echo mb_convert_case("ΚΑΛΗΣΠΕΡΑ ΣΑΣ", MB_CASE_LOWER_SIMPLE, "UTF-8"), "\n";
66
67?>
68--EXPECT--
69String: ß
70Lower: ß
71Lower Simple: ß
72Upper: SS
73Upper Simple: ß
74Title: Ss
75Title Simple: ß
76Fold: ss
77Fold Simple: ß
78
79String: ff
80Lower: ff
81Lower Simple: ff
82Upper: FF
83Upper Simple: ff
84Title: Ff
85Title Simple: ff
86Fold: ff
87Fold Simple: ff
88
89String: İ
90Lower: i̇
91Lower Simple: i
92Upper: İ
93Upper Simple: İ
94Title: İ
95Title Simple: İ
96Fold: i̇
97Fold Simple: İ
98
9969
10069
10169
10269
103
10449
10549
106fd
107fd
108
109fd
110fd
111fd
112fd
113
114dd
115dd
11669
11769
118Καλησπερα Σας
119Καλησπερα Σασ
120καλησπερα σας
121καλησπερα σασ
122