1--TEST-- 2mb_strpos() 3--EXTENSIONS-- 4mbstring 5--FILE-- 6<?php 7// TODO: Add more encodings 8 9ini_set('include_path','.'); 10include_once('common.inc'); 11 12 13// Test string 14$euc_jp = "0123\xA4\xB3\xA4\xCE\xCA\xB8\xBB\xFA\xCE\xF3\xA4\xCF\xC6\xFC\xCB\xDC\xB8\xEC\xA4\xC7\xA4\xB9\xA1\xA3EUC-JP\xA4\xF2\xBB\xC8\xA4\xC3\xA4\xC6\xA4\xA4\xA4\xDE\xA4\xB9\xA1\xA30123\xC6\xFC\xCB\xDC\xB8\xEC\xA4\xCF\xCC\xCC\xC5\xDD\xBD\xAD\xA4\xA4\xA1\xA3"; 15 16$slen = mb_strlen($euc_jp, 'EUC-JP'); 17echo "String len: $slen\n"; 18 19// EUC-JP - With encoding parameter 20mb_internal_encoding('UTF-8') or print("mb_internal_encoding() failed\n"); 21 22echo "== POSITIVE OFFSET ==\n"; 23 24print mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 0, 'EUC-JP') . "\n"; 25print mb_strpos($euc_jp, '0', 0, 'EUC-JP') . "\n"; 26print mb_strpos($euc_jp, 3, 0, 'EUC-JP') . "\n"; 27print mb_strpos($euc_jp, 0, 0, 'EUC-JP') . "\n"; 28print mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 15, 'EUC-JP') . "\n"; 29print mb_strpos($euc_jp, '0', 15, 'EUC-JP') . "\n"; 30print mb_strpos($euc_jp, 3, 15, 'EUC-JP') . "\n"; 31print mb_strpos($euc_jp, 0, 15, 'EUC-JP') . "\n"; 32 33 34// Negative offset 35echo "== NEGATIVE OFFSET ==\n"; 36 37print mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", -15, 'EUC-JP') . "\n"; 38print mb_strpos($euc_jp, '0', -15, 'EUC-JP') . "\n"; 39print mb_strpos($euc_jp, 3, -15, 'EUC-JP') . "\n"; 40print mb_strpos($euc_jp, 0, -15, 'EUC-JP') . "\n"; 41print mb_strpos($euc_jp, 0, -43, 'EUC-JP') . "\n"; 42 43 44// Non-existent 45echo "== NON-EXISTENT ==\n"; 46 47$r = mb_strpos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC", 0, 'EUC-JP'); 48($r === FALSE) ? print "OK_STR\n" : print "NG_STR\n"; 49$r = mb_strpos($euc_jp, "\n", 0, 'EUC-JP'); 50($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n"; 51 52 53// EUC-JP - No encoding parameter 54echo "== NO ENCODING PARAMETER ==\n"; 55 56mb_internal_encoding('EUC-JP') or print("mb_internal_encoding() failed\n"); 57 58print mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 0) . "\n"; 59print mb_strpos($euc_jp, '0', 0) . "\n"; 60print mb_strpos($euc_jp, 3, 0) . "\n"; 61print mb_strpos($euc_jp, 0, 0) . "\n"; 62 63$r = mb_strpos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC", 0); 64($r === FALSE) ? print "OK_STR\n" : print "NG_STR\n"; 65$r = mb_strpos($euc_jp, "\n", 0); 66($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n"; 67 68// EUC-JP - No offset and encoding parameter 69echo "== NO OFFSET AND ENCODING PARAMETER ==\n"; 70 71mb_internal_encoding('EUC-JP') or print("mb_internal_encoding() failed\n"); 72 73print mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC") . "\n"; 74print mb_strpos($euc_jp, '0') . "\n"; 75print mb_strpos($euc_jp, 3) . "\n"; 76print mb_strpos($euc_jp, 0) . "\n"; 77 78$r = mb_strpos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC"); 79($r === FALSE) ? print "OK_STR\n" : print "NG_STR\n"; 80$r = mb_strpos($euc_jp, "\n"); 81($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n"; 82 83echo "== INVALID STRINGS ==\n"; 84 85// Previously, mb_strpos would internally convert invalid byte sequences to '?' 86// BEFORE performing search 87// (This was regardless of the setting of mb_substitute_char) 88// So invalid byte sequences would match '?', both from haystack to needle 89// and needle to haystack 90 91var_dump(mb_strpos("abc??", "\xFF", 0, "UTF-8")); // should be false 92var_dump(mb_strpos("abc\xFF", "?", 0, "UTF-8")); // should be false 93 94// However, invalid byte sequences can still match other invalid byte 95// sequences for non-UTF-8 encodings only: 96var_dump(mb_strpos("\x00a\x00b\x00c\xDF\xFF", "\xDB\x00", 0, "UTF-16BE")); 97 98// For UTF-8, invalid byte sequences match the exact same invalid sequence, 99// but not a different one 100var_dump(mb_strpos("abc\x80\x80", "\xFF", 0, "UTF-8")); // should be false 101var_dump(mb_strpos("abc\xFF", "c\x80", 0, "UTF-8")); // should be false 102 103var_dump(mb_strpos("abc\x80\x80", "\x80", 0, "UTF-8")); 104var_dump(mb_strpos("abc\xFF", "c\xFF", 0, "UTF-8")); 105 106?> 107--EXPECT-- 108String len: 43 109== POSITIVE OFFSET == 11010 1110 1123 1130 11434 11530 11633 11730 118== NEGATIVE OFFSET == 11934 12030 12133 12230 1230 124== NON-EXISTENT == 125OK_STR 126OK_NEWLINE 127== NO ENCODING PARAMETER == 12810 1290 1303 1310 132OK_STR 133OK_NEWLINE 134== NO OFFSET AND ENCODING PARAMETER == 13510 1360 1373 1380 139OK_STR 140OK_NEWLINE 141== INVALID STRINGS == 142bool(false) 143bool(false) 144int(3) 145bool(false) 146bool(false) 147int(3) 148int(2) 149