1--TEST-- 2Bug #66121 - UTF-8 lookbehinds match bytes instead of characters 3--FILE-- 4<?php 5// Sinhala characters 6var_dump(preg_replace('/(?<!ක)/u', '*', 'ක')); 7var_dump(preg_replace('/(?<!ක)/u', '*', 'ම')); 8// English characters 9var_dump(preg_replace('/(?<!k)/u', '*', 'k')); 10var_dump(preg_replace('/(?<!k)/u', '*', 'm')); 11// Sinhala characters 12preg_match_all('/(?<!ක)/u', 'ම', $matches, PREG_OFFSET_CAPTURE); 13var_dump($matches); 14// invalid UTF-8 15var_dump(preg_replace('/(?<!ක)/u', '*', "\xFCක")); 16var_dump(preg_replace('/(?<!ක)/u', '*', "ක\xFC")); 17var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE)); 18var_dump(preg_match_all('/(?<!ක)/u', "\xFCම", $matches, PREG_OFFSET_CAPTURE)); 19?> 20--EXPECT-- 21string(4) "*ක" 22string(5) "*ම*" 23string(2) "*k" 24string(3) "*m*" 25array(1) { 26 [0]=> 27 array(2) { 28 [0]=> 29 array(2) { 30 [0]=> 31 string(0) "" 32 [1]=> 33 int(0) 34 } 35 [1]=> 36 array(2) { 37 [0]=> 38 string(0) "" 39 [1]=> 40 int(3) 41 } 42 } 43} 44NULL 45NULL 46bool(false) 47bool(false) 48