1--TEST-- 2mb_str_split() tests for the japanese language 3--EXTENSIONS-- 4mbstring 5--INI-- 6output_handler= 7--FILE-- 8<?php 9ini_set('include_path','.'); 10include_once('common.inc'); 11 12$string = "日本"; /* 2 chars */ 13$len = 2; 14$charset = [ 15 "BIG-5", 16 "EUC-JP", 17 "ISO-2022-JP", 18 "SJIS", 19 "UTF-16BE", 20 "UTF-16LE", 21 "UTF-32BE", 22 "UTF-32LE", 23 "UTF-8" 24]; 25 26 27foreach($charset as $cs){ 28 $enc = mb_convert_encoding($string, $cs, "UTF-8"); 29 $split = mb_str_split($enc, 1, $cs); 30 31 /* check chunks number */ 32 for($i = 1; $i <= $len; ++$i){ 33 $ceil = ceil($len / $i); 34 $cnt = count(mb_str_split($enc,$i,$cs)); 35 if($ceil != $cnt){ 36 echo "$cs WRONG CHUNKS NUMBER: expected/actual: $ceil/$cnt\n"; 37 } 38 } 39 40 /* check content */ 41 echo "$cs:"; 42 for($i = 0; $i < $len; ++$i){ 43 echo " " . unpack("H*", $split[$i])[1]; 44 } 45 echo "\n"; 46} 47 48/* long string test */ 49$size = 50000; 50$long = str_repeat($string, $size); /* 50k x 2 chars = 1e5 chars */ 51$enc = mb_convert_encoding($long, "ISO-2022-JP", "UTF-8"); 52$array = mb_str_split($enc, $len, "ISO-2022-JP"); 53$count = count($array); 54 55/* check array size */ 56if($size !== $count) printf("Long string splitting error: actual array size: %d expected: %d\n", $count, $size); 57 58/* compare initial string and last array element after splitting */ 59$enc = mb_convert_encoding($string, "ISO-2022-JP", "UTF-8"); 60if(end($array) !== $enc){ 61 printf("Long string splitting error: 62 last array element: %s expected: %s\n", unpack("H*", end($array))[1],unpack("H*", $enc)[1]); 63} 64 65/* SJIS byte 0x80 was previously wrongly treated as the starting byte for a 2-byte character */ 66echo "== Regression test for SJIS byte 0x80 ==\n"; 67foreach (['SJIS', 'SJIS-2004', 'MacJapanese', 'SJIS-Mobile#DOCOMO', 'SJIS-Mobile#KDDI', 'SJIS-Mobile#SoftBank'] as $encoding) { 68 $array = mb_str_split("\x80\xA1abc\x80\xA1", 2, $encoding); 69 echo "$encoding: [" . implode(', ', array_map('bin2hex', $array)) . "]\n"; 70 71 // Also try bytes 0xFD, 0xFE, and 0xFF 72 $array = mb_str_split("abc\xFD\xFE\xFFab\xFD\xFE\xFF", 2, $encoding); 73 echo "$encoding: [" . implode(', ', array_map('bin2hex', $array)) . "]\n"; 74} 75 76?> 77--EXPECT-- 78BIG-5: a4e9 a5bb 79EUC-JP: c6fc cbdc 80ISO-2022-JP: 1b2442467c1b2842 1b24424b5c1b2842 81SJIS: 93fa 967b 82UTF-16BE: 65e5 672c 83UTF-16LE: e565 2c67 84UTF-32BE: 000065e5 0000672c 85UTF-32LE: e5650000 2c670000 86UTF-8: e697a5 e69cac 87== Regression test for SJIS byte 0x80 == 88SJIS: [80a1, 6162, 6380, a1] 89SJIS: [6162, 63fd, feff, 6162, fdfe, ff] 90SJIS-2004: [80a1, 6162, 6380, a1] 91SJIS-2004: [6162, 63fd, feff, 6162, fdfe, ff] 92MacJapanese: [80a1, 6162, 6380, a1] 93MacJapanese: [6162, 63fd, feff, 6162, fdfe, ff] 94SJIS-Mobile#DOCOMO: [80a1, 6162, 6380, a1] 95SJIS-Mobile#DOCOMO: [6162, 63fd, feff, 6162, fdfe, ff] 96SJIS-Mobile#KDDI: [80a1, 6162, 6380, a1] 97SJIS-Mobile#KDDI: [6162, 63fd, feff, 6162, fdfe, ff] 98SJIS-Mobile#SoftBank: [80a1, 6162, 6380, a1] 99SJIS-Mobile#SoftBank: [6162, 63fd, feff, 6162, fdfe, ff] 100