--TEST-- mb_substr() --EXTENSIONS-- mbstring --FILE-- "; $pos = mb_strpos($data, "<", 0, "UTF-8"); $out = mb_substr($data, 0, $pos, "UTF-8"); print $out . "\n"; echo "Regression:\n"; /* During development, one >= comparison in mb_get_substr was wrongly written as > * This was caught by libFuzzer */ $str = "\xbd\xbd\xbd\xbd\xbd\xbd\xbd\xbe\xbd\xbd\xbd\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x89\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x00\x00\x00\x00\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8b\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8f\x8b\x8b\x8b\xbd\xbd\xbd\xbd\xbd\xbd\xbd\xbe\x01:O\xaa\xd3"; echo bin2hex(mb_substr($str, 0, 128, "JIS")), "\n"; /* Alex messed up when reimplementing mb_substr and, in cases where `from` is non-zero and * the number of characters to extract is more than 128, miscalculated where to end the substring * Thanks to Maurício Fauth for finding the issue */ var_dump(mb_substr('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum dapibus feugiat ex non cursus. Pellentesque vestibulum tellus sit lectus.', 19, -1)); ?> --EXPECT-- EUC-JP: 1: c6fccbdcb8eca4c7a4b9a1a34555432d 2: 30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3 3 OK 4 OK: 30313233a4b3a4cecab8bbfacef3a4cf SJIS: 1: 93fa967b8cea 2: 8142 3: 825582568257 4: 967b8cea8365834c8358836782c582b781423031323334825482558256825782588142 5: -- Testing illegal SJIS byte 0x80 -- 633f 3f6162 SJIS-2004: 633f 3f6162 MacJapanese: 6380 806162 SJIS-Mobile#DOCOMO: 633f 3f6162 SJIS-Mobile#KDDI: 633f 3f6162 SJIS-Mobile#SoftBank: 633f 3f6162 -- Testing MacJapanese characters which map to 3-5 codepoints each -- 616263 3f3f 58 616263 3f3f 78 ISO-2022-JP: 1: 1b2442212121721b284241 2: 43 3: 1b2442212121721b284241 4: 4142 5: 1b244221721b2842414243 6: 7: GB-18030: 1: c3dcc2ebd3c3 2: b3c6 3: c2ebc3fbb3c6 4: c2ebd3c3bba7c3fbc3dcc2ebc3fbb3c6c3fbb3c6 5: HZ: 1: The 2: . 3: ~{!#~}By 4: he next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye. 5: UTF-8: 1: Gre 2: ь 3: йте 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь 5: UTF-32: 1: Gre 2: ь 3: йте 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь 5: UTF-7: 1: Gre 2: ь 3: йте 4: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь 5: Testing agreement with mb_strpos on invalid UTF-8 string: ?AAA Regression: 1b28493d3d3d3d3d3d3d3e3d3d3d1b28423f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f000000003f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f1b28493d3d3d3d3d3d3d3e1b2842013a4f1b28492a1b2842 string(121) "it amet, consectetur adipiscing elit. Vestibulum dapibus feugiat ex non cursus. Pellentesque vestibulum tellus sit lectus"