xref: /php-src/ext/mbstring/tests/mb_strlen.phpt (revision c50172e8)
1--TEST--
2mb_strlen()
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7// TODO: Add more encodings
8
9ini_set('include_path', __DIR__);
10include_once('common.inc');
11
12// restore detect_order to 'auto'
13mb_detect_order('auto');
14
15// Test string
16$euc_jp = mb_convert_encoding("0123この文字列は日本語です。EUC-JPを使っています。0123日本語は面倒臭い。", 'EUC-JP', 'UTF-8');
17$ascii  = 'abcdefghijklmnopqrstuvwxyz;]=#0123456789';
18
19echo "== ASCII ==\n";
20print mb_strlen($ascii,'ASCII') . "\n";
21print strlen($ascii) . "\n";
22
23echo "== EUC-JP ==\n";
24print mb_strlen($euc_jp,'EUC-JP') . "\n";
25mb_internal_encoding('EUC-JP') or print("mb_internal_encoding() failed\n");
26print strlen($euc_jp) . "\n";
27
28echo "== SJIS ==\n";
29$sjis = mb_convert_encoding($euc_jp, 'SJIS','EUC-JP');
30print mb_strlen($sjis,'SJIS') . "\n";
31mb_internal_encoding('SJIS') or print("mb_internal_encoding() failed\n");
32print strlen($sjis) . "\n";
33print "-- Testing illegal bytes 0x80,0xFD-FF --\n";
34// mb_strlen used to wrongly treat 0x80 as the starting byte of a 2-byte SJIS character
35print mb_strlen("\x80\xA1", 'SJIS') . "\n";
36print mb_strlen("abc\xFD\xFE\xFF", 'SJIS') . "\n";
37
38echo "== CP932 ==\n";
39print mb_strlen("\x80\xA1", "CP932") . "\n";
40// 0xFD, 0xFE, 0xFF is reserved.
41print mb_strlen("abc\xFD\xFE\xFF", 'CP932') . "\n";
42print mb_strlen("\x80\xA1", "SJIS-win") . "\n";
43print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-win') . "\n";
44
45echo "== MacJapanese ==\n";
46print mb_strlen("\x80\xA1", 'MacJapanese') . "\n";
47print mb_strlen("abc\xFD\xFE\xFF", 'MacJapanese') . "\n";
48
49echo "== SJIS-2004 ==\n";
50print mb_strlen("\x80\xA1", 'SJIS-2004') . "\n";
51print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-2004') . "\n";
52
53echo "== SJIS-Mobile#DOCOMO ==\n";
54print mb_strlen("\x80\xA1", 'SJIS-Mobile#DOCOMO') . "\n";
55print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-Mobile#DOCOMO') . "\n";
56
57echo "== SJIS-Mobile#KDDI ==\n";
58print mb_strlen("\x80\xA1", 'SJIS-Mobile#KDDI') . "\n";
59print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-Mobile#KDDI') . "\n";
60
61echo "== SJIS-Mobile#SoftBank ==\n";
62print mb_strlen("\x80\xA1", 'SJIS-Mobile#SoftBank') . "\n";
63print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-Mobile#SoftBank') . "\n";
64
65echo "== JIS ==\n";
66$jis = mb_convert_encoding($euc_jp, 'JIS','EUC-JP');
67print mb_strlen($jis,'JIS') . "\n";
68mb_internal_encoding('JIS')  or print("mb_internal_encoding() failed\n");
69print strlen($jis) . "\n";
70
71echo "== UTF-8 ==\n";
72$utf8 = mb_convert_encoding($euc_jp, 'UTF-8', 'EUC-JP');
73print mb_strlen($utf8,'UTF-8') . " codepoints\n";
74mb_internal_encoding('UTF-8') or print("mb_internal_encoding() failed\n");
75print strlen($utf8) . " bytes\n";
76
77$utf8 = "abcde あいうえお 汉字 ελληνικά";
78$long_utf8 = str_repeat($utf8, 100);
79print mb_strlen($utf8, 'UTF-8') . "\n";
80print mb_strlen($long_utf8, 'UTF-8') . "\n";
81
82echo "== UTF-8 with performance optimizations ==\n";
83// Optimized mb_strlen can be used on UTF-8 strings after they are checked for validity
84mb_check_encoding($utf8);
85mb_check_encoding($long_utf8);
86print mb_strlen($utf8, 'UTF-8') . "\n";
87print mb_strlen($long_utf8, 'UTF-8') . "\n";
88
89$str = str_repeat('Σ', 2048); // 2-byte UTF-8 character
90mb_check_encoding($str, 'UTF-8');
91print mb_strlen($str, 'UTF-8') . "\n";
92
93// Wrong Parameters
94echo "== WRONG PARAMETERS ==\n";
95// Wrong encoding
96mb_internal_encoding('EUC-JP');
97try {
98    var_dump( mb_strlen($euc_jp, 'BAD_NAME') );
99} catch (\ValueError $e) {
100    echo $e->getMessage() . \PHP_EOL;
101}
102
103?>
104--EXPECT--
105== ASCII ==
10640
10740
108== EUC-JP ==
10943
11072
111== SJIS ==
11243
11372
114-- Testing illegal bytes 0x80,0xFD-FF --
1152
1166
117== CP932 ==
1182
1196
1202
1216
122== MacJapanese ==
1232
1247
125== SJIS-2004 ==
1262
1276
128== SJIS-Mobile#DOCOMO ==
1292
1306
131== SJIS-Mobile#KDDI ==
1322
1336
134== SJIS-Mobile#SoftBank ==
1352
1366
137== JIS ==
13843
13990
140== UTF-8 ==
14143 codepoints
142101 bytes
14323
1442300
145== UTF-8 with performance optimizations ==
14623
1472300
1482048
149== WRONG PARAMETERS ==
150mb_strlen(): Argument #2 ($encoding) must be a valid encoding, "BAD_NAME" given
151