1--TEST-- 2Test mb_ereg() function : usage variations - match special characters 3--SKIPIF-- 4<?php 5extension_loaded('mbstring') or die('skip'); 6function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build"); 7?> 8--FILE-- 9<?php 10/* Prototype : int mb_ereg(string $pattern, string $string [, array $registers]) 11 * Description: Regular expression match for multibyte string 12 * Source code: ext/mbstring/php_mbregex.c 13 */ 14 15/* 16 * Test how mb_ereg() matches special characters for $pattern 17 */ 18 19echo "*** Testing mb_ereg() : usage variations ***\n"; 20 21if(mb_regex_encoding('utf-8') == true) { 22 echo "Regex encoding set to utf-8\n"; 23} else { 24 echo "Could not set regex encoding to utf-8\n"; 25} 26 27$regex_char = array ('\w+' => b'\w+', 28 '\W+' => b'\W+', 29 '\s+' => b'\s+', 30 '\S+' => b'\S+', 31 '\d+' => b'\d+', 32 '\D+' => b'\D+', 33 '\b' => b'\b', 34 '\B' => b'\B'); 35 36$string_ascii = b'This is an English string. 0123456789.'; 37$string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII='); 38 39foreach ($regex_char as $displayChar => $char) { 40 echo "\n--** Pattern is: $displayChar **--\n"; 41 if (@$regs_ascii || @$regs_mb) { 42 $regs_ascii = null; 43 $regs_mb = null; 44 } 45 echo "-- ASCII String: --\n"; 46 var_dump(mb_ereg($char, $string_ascii, $regs_ascii)); 47 base64_encode_var_dump($regs_ascii); 48 49 echo "-- Multibyte String: --\n"; 50 var_dump(mb_ereg($char, $string_mb, $regs_mb)); 51 base64_encode_var_dump($regs_mb); 52 53} 54 55/** 56 * replicate a var dump of an array but outputted string values are base64 encoded 57 * 58 * @param array $regs 59 */ 60function base64_encode_var_dump($regs) { 61 if ($regs) { 62 echo "array(" . count($regs) . ") {\n"; 63 foreach ($regs as $key => $value) { 64 echo " [$key]=>\n "; 65 if (is_string($value)) { 66 var_dump(base64_encode($value)); 67 } else { 68 var_dump($value); 69 } 70 } 71 echo "}\n"; 72 } else { 73 echo "NULL\n"; 74 } 75} 76 77echo "Done"; 78 79?> 80--EXPECT-- 81*** Testing mb_ereg() : usage variations *** 82Regex encoding set to utf-8 83 84--** Pattern is: \w+ **-- 85-- ASCII String: -- 86int(4) 87array(1) { 88 [0]=> 89 string(8) "VGhpcw==" 90} 91-- Multibyte String: -- 92int(27) 93array(1) { 94 [0]=> 95 string(36) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ" 96} 97 98--** Pattern is: \W+ **-- 99-- ASCII String: -- 100int(1) 101array(1) { 102 [0]=> 103 string(4) "IA==" 104} 105-- Multibyte String: -- 106int(3) 107array(1) { 108 [0]=> 109 string(4) "44CC" 110} 111 112--** Pattern is: \s+ **-- 113-- ASCII String: -- 114int(1) 115array(1) { 116 [0]=> 117 string(4) "IA==" 118} 119-- Multibyte String: -- 120bool(false) 121NULL 122 123--** Pattern is: \S+ **-- 124-- ASCII String: -- 125int(4) 126array(1) { 127 [0]=> 128 string(8) "VGhpcw==" 129} 130-- Multibyte String: -- 131int(53) 132array(1) { 133 [0]=> 134 string(72) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=" 135} 136 137--** Pattern is: \d+ **-- 138-- ASCII String: -- 139int(10) 140array(1) { 141 [0]=> 142 string(16) "MDEyMzQ1Njc4OQ==" 143} 144-- Multibyte String: -- 145int(20) 146array(1) { 147 [0]=> 148 string(28) "MDEyMzTvvJXvvJbvvJfvvJjvvJk=" 149} 150 151--** Pattern is: \D+ **-- 152-- ASCII String: -- 153int(27) 154array(1) { 155 [0]=> 156 string(36) "VGhpcyBpcyBhbiBFbmdsaXNoIHN0cmluZy4g" 157} 158-- Multibyte String: -- 159int(30) 160array(1) { 161 [0]=> 162 string(40) "5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CC" 163} 164 165--** Pattern is: \b **-- 166-- ASCII String: -- 167int(1) 168array(1) { 169 [0]=> 170 bool(false) 171} 172-- Multibyte String: -- 173int(1) 174array(1) { 175 [0]=> 176 bool(false) 177} 178 179--** Pattern is: \B **-- 180-- ASCII String: -- 181int(1) 182array(1) { 183 [0]=> 184 bool(false) 185} 186-- Multibyte String: -- 187int(1) 188array(1) { 189 [0]=> 190 bool(false) 191} 192Done 193