1--TEST-- 2mb_strlen() 3--EXTENSIONS-- 4mbstring 5--FILE-- 6<?php 7// TODO: Add more encodings 8 9ini_set('include_path', __DIR__); 10include_once('common.inc'); 11 12// restore detect_order to 'auto' 13mb_detect_order('auto'); 14 15// Test string 16$euc_jp = mb_convert_encoding("0123この文字列は日本語です。EUC-JPを使っています。0123日本語は面倒臭い。", 'EUC-JP', 'UTF-8'); 17$ascii = 'abcdefghijklmnopqrstuvwxyz;]=#0123456789'; 18 19echo "== ASCII ==\n"; 20print mb_strlen($ascii,'ASCII') . "\n"; 21print strlen($ascii) . "\n"; 22 23echo "== EUC-JP ==\n"; 24print mb_strlen($euc_jp,'EUC-JP') . "\n"; 25mb_internal_encoding('EUC-JP') or print("mb_internal_encoding() failed\n"); 26print strlen($euc_jp) . "\n"; 27 28echo "== SJIS ==\n"; 29$sjis = mb_convert_encoding($euc_jp, 'SJIS','EUC-JP'); 30print mb_strlen($sjis,'SJIS') . "\n"; 31mb_internal_encoding('SJIS') or print("mb_internal_encoding() failed\n"); 32print strlen($sjis) . "\n"; 33print "-- Testing illegal bytes 0x80,0xFD-FF --\n"; 34// mb_strlen used to wrongly treat 0x80 as the starting byte of a 2-byte SJIS character 35print mb_strlen("\x80\xA1", 'SJIS') . "\n"; 36print mb_strlen("abc\xFD\xFE\xFF", 'SJIS') . "\n"; 37 38echo "== CP932 ==\n"; 39print mb_strlen("\x80\xA1", "CP932") . "\n"; 40// 0xFD, 0xFE, 0xFF is reserved. 41print mb_strlen("abc\xFD\xFE\xFF", 'CP932') . "\n"; 42print mb_strlen("\x80\xA1", "SJIS-win") . "\n"; 43print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-win') . "\n"; 44 45echo "== MacJapanese ==\n"; 46print mb_strlen("\x80\xA1", 'MacJapanese') . "\n"; 47print mb_strlen("abc\xFD\xFE\xFF", 'MacJapanese') . "\n"; 48 49echo "== SJIS-2004 ==\n"; 50print mb_strlen("\x80\xA1", 'SJIS-2004') . "\n"; 51print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-2004') . "\n"; 52 53echo "== SJIS-Mobile#DOCOMO ==\n"; 54print mb_strlen("\x80\xA1", 'SJIS-Mobile#DOCOMO') . "\n"; 55print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-Mobile#DOCOMO') . "\n"; 56 57echo "== SJIS-Mobile#KDDI ==\n"; 58print mb_strlen("\x80\xA1", 'SJIS-Mobile#KDDI') . "\n"; 59print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-Mobile#KDDI') . "\n"; 60 61echo "== SJIS-Mobile#SoftBank ==\n"; 62print mb_strlen("\x80\xA1", 'SJIS-Mobile#SoftBank') . "\n"; 63print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-Mobile#SoftBank') . "\n"; 64 65echo "== JIS ==\n"; 66$jis = mb_convert_encoding($euc_jp, 'JIS','EUC-JP'); 67print mb_strlen($jis,'JIS') . "\n"; 68mb_internal_encoding('JIS') or print("mb_internal_encoding() failed\n"); 69print strlen($jis) . "\n"; 70 71echo "== UTF-8 ==\n"; 72$utf8 = mb_convert_encoding($euc_jp, 'UTF-8', 'EUC-JP'); 73print mb_strlen($utf8,'UTF-8') . " codepoints\n"; 74mb_internal_encoding('UTF-8') or print("mb_internal_encoding() failed\n"); 75print strlen($utf8) . " bytes\n"; 76 77$utf8 = "abcde あいうえお 汉字 ελληνικά"; 78$long_utf8 = str_repeat($utf8, 100); 79print mb_strlen($utf8, 'UTF-8') . "\n"; 80print mb_strlen($long_utf8, 'UTF-8') . "\n"; 81 82echo "== UTF-8 with performance optimizations ==\n"; 83// Optimized mb_strlen can be used on UTF-8 strings after they are checked for validity 84mb_check_encoding($utf8); 85mb_check_encoding($long_utf8); 86print mb_strlen($utf8, 'UTF-8') . "\n"; 87print mb_strlen($long_utf8, 'UTF-8') . "\n"; 88 89$str = str_repeat('Σ', 2048); // 2-byte UTF-8 character 90mb_check_encoding($str, 'UTF-8'); 91print mb_strlen($str, 'UTF-8') . "\n"; 92 93// Wrong Parameters 94echo "== WRONG PARAMETERS ==\n"; 95// Wrong encoding 96mb_internal_encoding('EUC-JP'); 97try { 98 var_dump( mb_strlen($euc_jp, 'BAD_NAME') ); 99} catch (\ValueError $e) { 100 echo $e->getMessage() . \PHP_EOL; 101} 102 103?> 104--EXPECT-- 105== ASCII == 10640 10740 108== EUC-JP == 10943 11072 111== SJIS == 11243 11372 114-- Testing illegal bytes 0x80,0xFD-FF -- 1152 1166 117== CP932 == 1182 1196 1202 1216 122== MacJapanese == 1232 1247 125== SJIS-2004 == 1262 1276 128== SJIS-Mobile#DOCOMO == 1292 1306 131== SJIS-Mobile#KDDI == 1322 1336 134== SJIS-Mobile#SoftBank == 1352 1366 137== JIS == 13843 13990 140== UTF-8 == 14143 codepoints 142101 bytes 14323 1442300 145== UTF-8 with performance optimizations == 14623 1472300 1482048 149== WRONG PARAMETERS == 150mb_strlen(): Argument #2 ($encoding) must be a valid encoding, "BAD_NAME" given 151