1--TEST--
2Testing mb_convert_kana() function
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7$zenKakuA    =	'ァアィイゥウェエォオカガキギク';
8$zenKakuB    =	'グケゲコゴサザシジスズセゼソゾタ';
9$zenKakuC    =	'ダチヂッツヅテデトドナニヌネノハ';
10$zenKakuD    =	'バパヒビピフブプヘベペホボポマミ';
11$zenKakuE    =	'ムメモャヤュユョヨラリルレロヮワ';
12$zenKakuF    =	'ヰヱヲンヴヵヶヷヸヹヺ・ーヽヾ';
13
14$hanKakuA    =	'⦆。「」、・ヲァィゥェォャュョッ';
15$hanKakuB    =	'ーアイウエオカキクケコサシスセソ';
16$hanKakuC    =	'タチツテトナニヌネノハヒフヘホマ';
17$hanKakuD    =	'ミムメモヤユヨラリルレロワン゙゚';
18
19// Convert all Zenkaku to Hankaku; no effect
20echo "'A': " . $zenKakuA . ' => ' . mb_convert_kana($zenKakuA, 'AK', 'utf-8') . "\n";
21// Convert all Hankaku to Zenkaku; has an effect
22echo "'a': " . $zenKakuB . ' => ' . mb_convert_kana($zenKakuB, 'ak', 'utf-8') . "\n";
23echo "'a': " . $zenKakuC . ' => ' . mb_convert_kana($zenKakuC, 'ak', 'utf-8') . "\n";
24echo "'a': " . $zenKakuD . ' => ' . mb_convert_kana($zenKakuD, 'ak', 'utf-8') . "\n";
25echo "'a': " . $zenKakuE . ' => ' . mb_convert_kana($zenKakuE, 'ak', 'utf-8') . "\n";
26echo "'a': " . $zenKakuF . ' => ' . mb_convert_kana($zenKakuF, 'ak', 'utf-8') . "\n";
27echo "\n";
28// Convert all Zenkaku to Hankaku; has an effect
29echo "'A': " . $hanKakuA . ' => ' . mb_convert_kana($hanKakuA, 'AK', 'utf-8') . "\n";
30echo "'A': " . $hanKakuB . ' => ' . mb_convert_kana($hanKakuB, 'AK', 'utf-8') . "\n";
31echo "'A': " . $hanKakuC . ' => ' . mb_convert_kana($hanKakuC, 'AK', 'utf-8') . "\n";
32echo "'A': " . $hanKakuD . ' => ' . mb_convert_kana($hanKakuD, 'AK', 'utf-8') . "\n\n";
33
34echo "Convert ASCII letter to full-width: A -> ", bin2hex(mb_convert_kana("\x00A", 'A', 'UTF-16BE')), "\n";
35echo "Convert ASCII letter to full-width: A -> ", bin2hex(mb_convert_kana("\x00A", 'R', 'UTF-16BE')), "\n";
36echo "Convert ASCII numeral to full-width: 1 -> ", bin2hex(mb_convert_kana("\x001", 'N', 'UTF-16BE')), "\n";
37echo "Convert ASCII space to full-width: ", bin2hex(mb_convert_kana("\x00 ", 'S', 'UTF-16BE')), "\n\n";
38
39echo "Convert hankaku kana to zenkaku kana:\n";
40echo "Using 'glue' mode:\n";
41echo bin2hex(mb_convert_kana("\xFF\x76\xFF\x9E", 'KV', 'UTF-16BE')), "\n";
42echo bin2hex(mb_convert_kana("\xFF\x73\xFF\x9E", 'KV', 'UTF-16BE')), "\n";
43echo bin2hex(mb_convert_kana("\xFF\x8A\xFF\x9F", 'KV', 'UTF-16BE')), "\n";
44echo bin2hex(mb_convert_kana("\xFF\x8A\x00A", 'KV', 'UTF-16BE')), "\n";
45echo bin2hex(mb_convert_kana("\xFF\x76\xFF\x9E", 'HV', 'UTF-16BE')), "\n";
46echo bin2hex(mb_convert_kana("\xFF\x8A\xFF\x9F", 'HV', 'UTF-16BE')), "\n";
47echo bin2hex(mb_convert_kana("\xFF\x8A\x00A", 'HV', 'UTF-16BE')), "\n";
48echo "Not using 'glue' mode:\n";
49echo bin2hex(mb_convert_kana("\xFF\x70", 'K', 'UTF-16BE')), "\n";
50echo bin2hex(mb_convert_kana("\xFF\x70", 'H', 'UTF-16BE')), "\n\n";
51
52echo "Convert selected punctuation/symbols to full-width and back:\n";
53echo bin2hex(mb_convert_kana("\x00\x5C\x00\xA5\x00\x7E\x20\x3E\x00\x27\x00\x22", 'M', 'UTF-16BE')), "\n";
54echo bin2hex(mb_convert_kana("\xFF\xE5\xFF\x3C\xFF\xE3\x20\x3E\x20\x18\x20\x19\x20\x1C\x20\x1D", 'm', 'UTF-16BE')), "\n\n";
55
56echo "Convert various full-width characters to half-width:\n";
57echo bin2hex(mb_convert_kana("\xFF\x01", 'a', 'UTF-16BE')), "\n";
58echo bin2hex(mb_convert_kana("\xFF\x21", 'r', 'UTF-16BE')), "\n";
59echo bin2hex(mb_convert_kana("\xFF\x10", 'n', 'UTF-16BE')), "\n";
60echo bin2hex(mb_convert_kana("\x30\x00", 's', 'UTF-16BE')), "\n";
61echo bin2hex(mb_convert_kana("\x22\x12", 'a', 'UTF-16BE')), "\n\n";
62
63echo "Convert full-width kana to half-width:\n";
64echo bin2hex(mb_convert_kana("\x30\x41", 'h', 'UTF-16BE')), "\n";
65echo bin2hex(mb_convert_kana("\x30\x01", 'h', 'UTF-16BE')), "\n";
66echo bin2hex(mb_convert_kana("\x30\x02", 'h', 'UTF-16BE')), "\n";
67echo bin2hex(mb_convert_kana("\x30\x0C", 'h', 'UTF-16BE')), "\n";
68echo bin2hex(mb_convert_kana("\x30\x0D", 'h', 'UTF-16BE')), "\n";
69echo bin2hex(mb_convert_kana("\x30\x9B", 'h', 'UTF-16BE')), "\n";
70echo bin2hex(mb_convert_kana("\x30\x9C", 'h', 'UTF-16BE')), "\n";
71echo bin2hex(mb_convert_kana("\x30\xFC", 'h', 'UTF-16BE')), "\n";
72echo bin2hex(mb_convert_kana("\x30\xFB", 'h', 'UTF-16BE')), "\n";
73echo bin2hex(mb_convert_kana("\x30\x01", 'k', 'UTF-16BE')), "\n";
74echo bin2hex(mb_convert_kana("\x30\x02", 'k', 'UTF-16BE')), "\n";
75echo bin2hex(mb_convert_kana("\x30\x0C", 'k', 'UTF-16BE')), "\n";
76echo bin2hex(mb_convert_kana("\x30\x0D", 'k', 'UTF-16BE')), "\n";
77echo bin2hex(mb_convert_kana("\x30\x9B", 'k', 'UTF-16BE')), "\n";
78echo bin2hex(mb_convert_kana("\x30\x9C", 'k', 'UTF-16BE')), "\n";
79echo bin2hex(mb_convert_kana("\x30\xFC", 'k', 'UTF-16BE')), "\n";
80echo bin2hex(mb_convert_kana("\x30\xFB", 'k', 'UTF-16BE')), "\n";
81echo bin2hex(mb_convert_kana("fooあいうエオ", "rnaskh", "UTF-8")), "\n";
82echo "Including one which will expand to two codepoints:\n";
83echo bin2hex(mb_convert_kana("\x30\x52", 'h', 'UTF-16BE')), "\n\n";
84
85echo "Convert full-width hiragana to full-width katakana:\n";
86echo bin2hex(mb_convert_kana("\x30\x41", 'C', 'UTF-16BE')), "\n";
87echo bin2hex(mb_convert_kana("\x30\x9D", 'C', 'UTF-16BE')), "\n";
88echo bin2hex(mb_convert_kana("\x30\x9E", 'C', 'UTF-16BE')), "\n\n";
89
90echo "Convert full-width katakana to full-width hiragana:\n";
91echo bin2hex(mb_convert_kana("\x30\xA1", 'c', 'UTF-16BE')), "\n";
92echo bin2hex(mb_convert_kana("\x30\xFD", 'c', 'UTF-16BE')), "\n";
93echo bin2hex(mb_convert_kana("\x30\xFE", 'c', 'UTF-16BE')), "\n\n";
94
95echo bin2hex(mb_convert_kana("\x00\x00", 'A', 'UTF-16BE')), "\n";
96echo "\n";
97
98// Try combinations of flags which don't make sense
99function tryBadFlags($flags) {
100  try {
101    mb_convert_kana('abc', $flags, 'UTF-8');
102    echo "BAD! mb_convert_kana should have thrown an exception for flags: $flags\n";
103  } catch (ValueError $e) {
104    echo $e->getMessage(), "\n";
105  }
106}
107function tryIncompatibleFlags($flag1, $flag2) {
108  tryBadFlags($flag1 . $flag2);
109  tryBadFlags($flag2 . $flag1);
110}
111
112tryIncompatibleFlags('A', 'a');
113tryIncompatibleFlags('A', 'r');
114tryIncompatibleFlags('A', 'n');
115tryIncompatibleFlags('a', 'R');
116tryIncompatibleFlags('a', 'N');
117tryIncompatibleFlags('R', 'r');
118tryIncompatibleFlags('N', 'n');
119tryIncompatibleFlags('S', 's');
120tryIncompatibleFlags('K', 'H');
121tryIncompatibleFlags('C', 'c');
122tryIncompatibleFlags('M', 'm');
123tryIncompatibleFlags('h', 'C');
124tryIncompatibleFlags('h', 'c');
125tryIncompatibleFlags('k', 'C');
126tryIncompatibleFlags('k', 'c');
127
128// Try non-existent flag
129try {
130  mb_convert_kana($zenKakuA, 'Z', 'UTF-8');
131} catch (ValueError $e) {
132  echo $e->getMessage() . "\n";
133}
134
135// Regression test: Two codepoints collapsed into one, just one position
136// before the end of the string
137$converted = mb_convert_kana("\xb9\xde\xde", 'HV', 'JIS');
138if ($converted !== "\x1b\$B\$2!+\x1b(B")
139  echo "Failed! Expected " . bin2hex("\x1b\$B\$2!+\x1b(B") . ", got: " . bin2hex($converted) . "\n";
140
141// Regression test: the old implementation of mb_convert_kana would swallow
142// zero bytes in some cases
143if (mb_convert_kana("abc\x00abc", 'c', 'ASCII') !== "abc\x00abc")
144  echo "mb_convert_kana is swallowing zero bytes!\n";
145
146?>
147--EXPECT--
148'A': ァアィイゥウェエォオカガキギク => ァアィイゥウェエォオカガキギク
149'a': グケゲコゴサザシジスズセゼソゾタ => グケゲコゴサザシジスズセゼソゾタ
150'a': ダチヂッツヅテデトドナニヌネノハ => ダチヂッツヅテデトドナニヌネノハ
151'a': バパヒビピフブプヘベペホボポマミ => バパヒビピフブプヘベペホボポマミ
152'a': ムメモャヤュユョヨラリルレロヮワ => ムメモャヤュユョヨラリルレロワワ
153'a': ヰヱヲンヴヵヶヷヸヹヺ・ーヽヾ => イエヲンヴヵヶヷヸヹヺ・ーヽヾ
154
155'A': ⦆。「」、・ヲァィゥェォャュョッ => ⦆。「」、・ヲァィゥェォャュョッ
156'A': ーアイウエオカキクケコサシスセソ => ーアイウエオカキクケコサシスセソ
157'A': タチツテトナニヌネノハヒフヘホマ => タチツテトナニヌネノハヒフヘホマ
158'A': ミムメモヤユヨラリルレロワン゙゚ => ミムメモヤユヨラリルレロワン゛゜
159
160Convert ASCII letter to full-width: A -> ff21
161Convert ASCII letter to full-width: A -> ff21
162Convert ASCII numeral to full-width: 1 -> ff11
163Convert ASCII space to full-width: 3000
164
165Convert hankaku kana to zenkaku kana:
166Using 'glue' mode:
16730ac
16830f4
16930d1
17030cf0041
171304c
1723071
173306f0041
174Not using 'glue' mode:
17530fc
17630fc
177
178Convert selected punctuation/symbols to full-width and back:
179ffe5ffe5ffe3ffe32019201d
180005c005c007e007e0027002700220022
181
182Convert various full-width characters to half-width:
1830021
1840041
1850030
1860020
187002d
188
189Convert full-width kana to half-width:
190ff67
191ff64
192ff61
193ff62
194ff63
195ff9e
196ff9f
197ff70
198ff65
199ff64
200ff61
201ff62
202ff63
203ff9e
204ff9f
205ff70
206ff65
207666f6fefbdb1efbdb2efbdb3efbdb4efbdb5
208Including one which will expand to two codepoints:
209ff79ff9e
210
211Convert full-width hiragana to full-width katakana:
21230a1
21330fd
21430fe
215
216Convert full-width katakana to full-width hiragana:
2173041
218309d
219309e
220
2210000
222
223mb_convert_kana(): Argument #2 ($mode) must not combine 'A' and 'a' flags
224mb_convert_kana(): Argument #2 ($mode) must not combine 'A' and 'a' flags
225mb_convert_kana(): Argument #2 ($mode) must not combine 'A' and 'r' flags
226mb_convert_kana(): Argument #2 ($mode) must not combine 'A' and 'r' flags
227mb_convert_kana(): Argument #2 ($mode) must not combine 'A' and 'n' flags
228mb_convert_kana(): Argument #2 ($mode) must not combine 'A' and 'n' flags
229mb_convert_kana(): Argument #2 ($mode) must not combine 'R' and 'a' flags
230mb_convert_kana(): Argument #2 ($mode) must not combine 'R' and 'a' flags
231mb_convert_kana(): Argument #2 ($mode) must not combine 'N' and 'a' flags
232mb_convert_kana(): Argument #2 ($mode) must not combine 'N' and 'a' flags
233mb_convert_kana(): Argument #2 ($mode) must not combine 'R' and 'r' flags
234mb_convert_kana(): Argument #2 ($mode) must not combine 'R' and 'r' flags
235mb_convert_kana(): Argument #2 ($mode) must not combine 'N' and 'n' flags
236mb_convert_kana(): Argument #2 ($mode) must not combine 'N' and 'n' flags
237mb_convert_kana(): Argument #2 ($mode) must not combine 'S' and 's' flags
238mb_convert_kana(): Argument #2 ($mode) must not combine 'S' and 's' flags
239mb_convert_kana(): Argument #2 ($mode) must not combine 'H' and 'K' flags
240mb_convert_kana(): Argument #2 ($mode) must not combine 'H' and 'K' flags
241mb_convert_kana(): Argument #2 ($mode) must not combine 'C' and 'c' flags
242mb_convert_kana(): Argument #2 ($mode) must not combine 'C' and 'c' flags
243mb_convert_kana(): Argument #2 ($mode) must not combine 'M' and 'm' flags
244mb_convert_kana(): Argument #2 ($mode) must not combine 'M' and 'm' flags
245mb_convert_kana(): Argument #2 ($mode) must not combine 'h' and 'C' flags
246mb_convert_kana(): Argument #2 ($mode) must not combine 'h' and 'C' flags
247mb_convert_kana(): Argument #2 ($mode) must not combine 'h' and 'c' flags
248mb_convert_kana(): Argument #2 ($mode) must not combine 'h' and 'c' flags
249mb_convert_kana(): Argument #2 ($mode) must not combine 'k' and 'C' flags
250mb_convert_kana(): Argument #2 ($mode) must not combine 'k' and 'C' flags
251mb_convert_kana(): Argument #2 ($mode) must not combine 'k' and 'c' flags
252mb_convert_kana(): Argument #2 ($mode) must not combine 'k' and 'c' flags
253mb_convert_kana(): Argument #2 ($mode) contains invalid flag: 'Z'
254--CREDITS--
255Jason Easter <easter@phpug-wuerzburg.de>
256PHPUG Würzburg <phpug-wuerzburg.de>
257Testfest 2009 2009-06-20
258