xref: /php-src/ext/mbstring/tests/mb_strpos.phpt (revision 7f445595)
1--TEST--
2mb_strpos()
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7// TODO: Add more encodings
8
9ini_set('include_path','.');
10include_once('common.inc');
11
12
13// Test string
14$euc_jp = "0123\xA4\xB3\xA4\xCE\xCA\xB8\xBB\xFA\xCE\xF3\xA4\xCF\xC6\xFC\xCB\xDC\xB8\xEC\xA4\xC7\xA4\xB9\xA1\xA3EUC-JP\xA4\xF2\xBB\xC8\xA4\xC3\xA4\xC6\xA4\xA4\xA4\xDE\xA4\xB9\xA1\xA30123\xC6\xFC\xCB\xDC\xB8\xEC\xA4\xCF\xCC\xCC\xC5\xDD\xBD\xAD\xA4\xA4\xA1\xA3";
15
16$slen = mb_strlen($euc_jp, 'EUC-JP');
17echo "String len: $slen\n";
18
19// EUC-JP - With encoding parameter
20mb_internal_encoding('UTF-8') or print("mb_internal_encoding() failed\n");
21
22echo  "== POSITIVE OFFSET ==\n";
23
24print  mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 0, 'EUC-JP') . "\n";
25print  mb_strpos($euc_jp, '0', 0,     'EUC-JP') . "\n";
26print  mb_strpos($euc_jp, 3, 0,       'EUC-JP') . "\n";
27print  mb_strpos($euc_jp, 0, 0,       'EUC-JP') . "\n";
28print  mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 15, 'EUC-JP') . "\n";
29print  mb_strpos($euc_jp, '0', 15,     'EUC-JP') . "\n";
30print  mb_strpos($euc_jp, 3, 15,       'EUC-JP') . "\n";
31print  mb_strpos($euc_jp, 0, 15,       'EUC-JP') . "\n";
32
33
34// Negative offset
35echo "== NEGATIVE OFFSET ==\n";
36
37print mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", -15, 'EUC-JP') . "\n";
38print mb_strpos($euc_jp, '0', -15,     'EUC-JP') . "\n";
39print mb_strpos($euc_jp, 3, -15,       'EUC-JP') . "\n";
40print mb_strpos($euc_jp, 0, -15,       'EUC-JP') . "\n";
41print mb_strpos($euc_jp, 0, -43,       'EUC-JP') . "\n";
42
43
44// Non-existent
45echo "== NON-EXISTENT ==\n";
46
47$r = mb_strpos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC", 0, 'EUC-JP');
48($r === FALSE) ? print "OK_STR\n"     : print "NG_STR\n";
49$r = mb_strpos($euc_jp, "\n",     0, 'EUC-JP');
50($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n";
51
52
53// EUC-JP - No encoding parameter
54echo "== NO ENCODING PARAMETER ==\n";
55
56mb_internal_encoding('EUC-JP')  or print("mb_internal_encoding() failed\n");
57
58print  mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 0) . "\n";
59print  mb_strpos($euc_jp, '0', 0) . "\n";
60print  mb_strpos($euc_jp, 3, 0) . "\n";
61print  mb_strpos($euc_jp, 0, 0) . "\n";
62
63$r = mb_strpos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC", 0);
64($r === FALSE) ? print "OK_STR\n"     : print "NG_STR\n";
65$r = mb_strpos($euc_jp, "\n", 0);
66($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n";
67
68// EUC-JP - No offset and encoding parameter
69echo "== NO OFFSET AND ENCODING PARAMETER ==\n";
70
71mb_internal_encoding('EUC-JP')  or print("mb_internal_encoding() failed\n");
72
73print  mb_strpos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC") . "\n";
74print  mb_strpos($euc_jp, '0') . "\n";
75print  mb_strpos($euc_jp, 3) . "\n";
76print  mb_strpos($euc_jp, 0) . "\n";
77
78$r = mb_strpos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC");
79($r === FALSE) ? print "OK_STR\n"     : print "NG_STR\n";
80$r = mb_strpos($euc_jp, "\n");
81($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n";
82
83echo "== INVALID STRINGS ==\n";
84
85// Previously, mb_strpos would internally convert invalid byte sequences to '?'
86// BEFORE performing search
87// (This was regardless of the setting of mb_substitute_char)
88// So invalid byte sequences would match '?', both from haystack to needle
89// and needle to haystack
90
91var_dump(mb_strpos("abc??", "\xFF", 0, "UTF-8")); // should be false
92var_dump(mb_strpos("abc\xFF", "?", 0, "UTF-8")); // should be false
93
94// However, invalid byte sequences can still match other invalid byte
95// sequences for non-UTF-8 encodings only:
96var_dump(mb_strpos("\x00a\x00b\x00c\xDF\xFF", "\xDB\x00", 0, "UTF-16BE"));
97
98// For UTF-8, invalid byte sequences match the exact same invalid sequence,
99// but not a different one
100var_dump(mb_strpos("abc\x80\x80", "\xFF", 0, "UTF-8")); // should be false
101var_dump(mb_strpos("abc\xFF", "c\x80", 0, "UTF-8")); // should be false
102
103var_dump(mb_strpos("abc\x80\x80", "\x80", 0, "UTF-8"));
104var_dump(mb_strpos("abc\xFF", "c\xFF", 0, "UTF-8"));
105
106?>
107--EXPECT--
108String len: 43
109== POSITIVE OFFSET ==
11010
1110
1123
1130
11434
11530
11633
11730
118== NEGATIVE OFFSET ==
11934
12030
12133
12230
1230
124== NON-EXISTENT ==
125OK_STR
126OK_NEWLINE
127== NO ENCODING PARAMETER ==
12810
1290
1303
1310
132OK_STR
133OK_NEWLINE
134== NO OFFSET AND ENCODING PARAMETER ==
13510
1360
1373
1380
139OK_STR
140OK_NEWLINE
141== INVALID STRINGS ==
142bool(false)
143bool(false)
144int(3)
145bool(false)
146bool(false)
147int(3)
148int(2)
149