1--TEST--
2mb_str_split() tests for the japanese language
3--EXTENSIONS--
4mbstring
5--INI--
6output_handler=
7--FILE--
8<?php
9ini_set('include_path','.');
10include_once('common.inc');
11
12$string = "日本";             /* 2 chars */
13$len = 2;
14$charset = [
15    "BIG-5",
16    "EUC-JP",
17    "ISO-2022-JP",
18    "SJIS",
19    "UTF-16BE",
20    "UTF-16LE",
21    "UTF-32BE",
22    "UTF-32LE",
23    "UTF-8"
24];
25
26
27foreach($charset as $cs){
28    $enc = mb_convert_encoding($string, $cs, "UTF-8");
29    $split = mb_str_split($enc, 1, $cs);
30
31    /* check chunks number */
32    for($i = 1; $i <= $len; ++$i){
33        $ceil = ceil($len / $i);
34        $cnt = count(mb_str_split($enc,$i,$cs));
35        if($ceil != $cnt){
36          echo "$cs WRONG CHUNKS NUMBER: expected/actual: $ceil/$cnt\n";
37        }
38    }
39
40    /* check content */
41    echo "$cs:";
42    for($i = 0; $i < $len; ++$i){
43        echo  " " . unpack("H*", $split[$i])[1];
44    }
45    echo "\n";
46}
47
48/* long string test */
49$size = 50000;
50$long = str_repeat($string, $size); /* 50k x 2 chars = 1e5 chars */
51$enc = mb_convert_encoding($long, "ISO-2022-JP", "UTF-8");
52$array = mb_str_split($enc, $len, "ISO-2022-JP");
53$count = count($array);
54
55/* check array size */
56if($size !== $count) printf("Long string splitting error: actual array size: %d expected: %d\n", $count, $size);
57
58/* compare initial string and last array element after splitting */
59$enc = mb_convert_encoding($string, "ISO-2022-JP", "UTF-8");
60if(end($array) !== $enc){
61    printf("Long string splitting error:
62        last array element: %s expected: %s\n", unpack("H*", end($array))[1],unpack("H*", $enc)[1]);
63}
64
65/* SJIS byte 0x80 was previously wrongly treated as the starting byte for a 2-byte character */
66echo "== Regression test for SJIS byte 0x80 ==\n";
67foreach (['SJIS', 'SJIS-2004', 'MacJapanese', 'SJIS-Mobile#DOCOMO', 'SJIS-Mobile#KDDI', 'SJIS-Mobile#SoftBank'] as $encoding) {
68    $array = mb_str_split("\x80\xA1abc\x80\xA1", 2, $encoding);
69    echo "$encoding: [" . implode(', ', array_map('bin2hex', $array)) . "]\n";
70
71    // Also try bytes 0xFD, 0xFE, and 0xFF
72    $array = mb_str_split("abc\xFD\xFE\xFFab\xFD\xFE\xFF", 2, $encoding);
73    echo "$encoding: [" . implode(', ', array_map('bin2hex', $array)) . "]\n";
74}
75
76?>
77--EXPECT--
78BIG-5: a4e9 a5bb
79EUC-JP: c6fc cbdc
80ISO-2022-JP: 1b2442467c1b2842 1b24424b5c1b2842
81SJIS: 93fa 967b
82UTF-16BE: 65e5 672c
83UTF-16LE: e565 2c67
84UTF-32BE: 000065e5 0000672c
85UTF-32LE: e5650000 2c670000
86UTF-8: e697a5 e69cac
87== Regression test for SJIS byte 0x80 ==
88SJIS: [80a1, 6162, 6380, a1]
89SJIS: [6162, 63fd, feff, 6162, fdfe, ff]
90SJIS-2004: [80a1, 6162, 6380, a1]
91SJIS-2004: [6162, 63fd, feff, 6162, fdfe, ff]
92MacJapanese: [80a1, 6162, 6380, a1]
93MacJapanese: [6162, 63fd, feff, 6162, fdfe, ff]
94SJIS-Mobile#DOCOMO: [80a1, 6162, 6380, a1]
95SJIS-Mobile#DOCOMO: [6162, 63fd, feff, 6162, fdfe, ff]
96SJIS-Mobile#KDDI: [80a1, 6162, 6380, a1]
97SJIS-Mobile#KDDI: [6162, 63fd, feff, 6162, fdfe, ff]
98SJIS-Mobile#SoftBank: [80a1, 6162, 6380, a1]
99SJIS-Mobile#SoftBank: [6162, 63fd, feff, 6162, fdfe, ff]
100