1--TEST-- 2mb_str_split() tests for the russian language 3--SKIPIF-- 4<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> 5--INI-- 6output_handler= 7mbstring.func_overload=0 8--FILE-- 9<?php 10ini_set('include_path','.'); 11include_once('common.inc'); 12 13$string = "рай рай рай "; /* 12 chars */ 14$len = 12; 15$charset = [ 16 "EUC-JP", 17 "CP866", 18 "KOI8-R", 19 "UTF-16BE", 20 "UTF-16LE", 21 "UTF-32BE", 22 "UTF-32LE", 23 "UTF-8" 24]; 25 26 27foreach($charset as $cs){ 28 $enc = mb_convert_encoding($string, $cs, "UTF-8"); 29 $split = mb_str_split($enc, 1, $cs); 30 31 32 /* check chunks number */ 33 for($i = 1; $i <= $len; ++$i){ 34 $ceil = ceil($len / $i); 35 $cnt = count(mb_str_split($enc,$i,$cs)); 36 if($ceil != $cnt){ 37 echo "$cs WRONG CHUNKS NUMBER: expected/actual: $ceil/$cnt\n"; 38 } 39 } 40 41 /* check content */ 42 echo "$cs:"; 43 for($i = 0; $i < $len; ++$i){ 44 echo " " . unpack("H*", $split[$i])[1]; 45 } 46 echo "\n"; 47} 48 49/* long string test */ 50$size = 25000; 51$long = str_repeat($string, $size); /* 25k x 12 chars = 3e5 chars */ 52$enc = mb_convert_encoding($long, "EUC-JP", "UTF-8"); 53$array = mb_str_split($enc, $len, "EUC-JP"); 54$count = count($array); 55 56/* check array size */ 57if($size !== $count) printf("Long string splitting error: actual array size: %d expected: %d\n", $count, $size); 58 59/* compare initial string and last array element after splitting */ 60$enc = mb_convert_encoding($string, "EUC-JP", "UTF-8"); 61if(end($array) !== $enc){ 62 printf("Long string splitting error: 63 last array element: %s expected: %s\n", unpack("H*", end($array))[1],unpack("H*", $enc)[1]); 64} 65 66?> 67--EXPECT-- 68EUC-JP: a7e2 a7d1 a7db 20 a7e2 a7d1 a7db 20 a7e2 a7d1 a7db 20 69CP866: e0 a0 a9 20 e0 a0 a9 20 e0 a0 a9 20 70KOI8-R: d2 c1 ca 20 d2 c1 ca 20 d2 c1 ca 20 71UTF-16BE: 0440 0430 0439 0020 0440 0430 0439 0020 0440 0430 0439 0020 72UTF-16LE: 4004 3004 3904 2000 4004 3004 3904 2000 4004 3004 3904 2000 73UTF-32BE: 00000440 00000430 00000439 00000020 00000440 00000430 00000439 00000020 00000440 00000430 00000439 00000020 74UTF-32LE: 40040000 30040000 39040000 20000000 40040000 30040000 39040000 20000000 40040000 30040000 39040000 20000000 75UTF-8: d180 d0b0 d0b9 20 d180 d0b0 d0b9 20 d180 d0b0 d0b9 20 76