1--TEST-- 2mb_str_split() tests for the japanese language 3--SKIPIF-- 4<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> 5--INI-- 6output_handler= 7mbstring.func_overload=0 8--FILE-- 9<?php 10ini_set('include_path','.'); 11include_once('common.inc'); 12 13$string = "日本"; /* 2 chars */ 14$len = 2; 15$charset = [ 16 "BIG-5", 17 "EUC-JP", 18 "ISO-2022-JP", 19 "SJIS", 20 "UTF-16BE", 21 "UTF-16LE", 22 "UTF-32BE", 23 "UTF-32LE", 24 "UTF-8" 25]; 26 27 28foreach($charset as $cs){ 29 $enc = mb_convert_encoding($string, $cs, "UTF-8"); 30 $split = mb_str_split($enc, 1, $cs); 31 32 /* check chunks number */ 33 for($i = 1; $i <= $len; ++$i){ 34 $ceil = ceil($len / $i); 35 $cnt = count(mb_str_split($enc,$i,$cs)); 36 if($ceil != $cnt){ 37 echo "$cs WRONG CHUNKS NUMBER: expected/actual: $ceil/$cnt\n"; 38 } 39 } 40 41 /* check content */ 42 echo "$cs:"; 43 for($i = 0; $i < $len; ++$i){ 44 echo " " . unpack("H*", $split[$i])[1]; 45 } 46 echo "\n"; 47} 48 49/* long string test */ 50$size = 50000; 51$long = str_repeat($string, $size); /* 50k x 2 chars = 1e5 chars */ 52$enc = mb_convert_encoding($long, "ISO-2022-JP", "UTF-8"); 53$array = mb_str_split($enc, $len, "ISO-2022-JP"); 54$count = count($array); 55 56/* check array size */ 57if($size !== $count) printf("Long string splitting error: actual array size: %d expected: %d\n", $count, $size); 58 59/* compare initial string and last array element after splitting */ 60$enc = mb_convert_encoding($string, "ISO-2022-JP", "UTF-8"); 61if(end($array) !== $enc){ 62 printf("Long string splitting error: 63 last array element: %s expected: %s\n", unpack("H*", end($array))[1],unpack("H*", $enc)[1]); 64} 65 66?> 67--EXPECT-- 68BIG-5: a4e9 a5bb 69EUC-JP: c6fc cbdc 70ISO-2022-JP: 1b2442467c1b2842 1b24424b5c1b2842 71SJIS: 93fa 967b 72UTF-16BE: 65e5 672c 73UTF-16LE: e565 2c67 74UTF-32BE: 000065e5 0000672c 75UTF-32LE: e5650000 2c670000 76UTF-8: e697a5 e69cac 77