1--TEST-- 2GH-10192 (mb_detect_encoding() results for UTF-7 differ between PHP 8.0 and 8.1) 3--EXTENSIONS-- 4mbstring 5--FILE-- 6<?php 7 8$testcases = [ 9 'non-base64 character after +' => 'A + B', 10 'non-base64 character after -' => 'A - B', 11 'base64 character before +' => 'A 1+ B', 12 'base64 character before -' => 'A 1- B', 13 'base64 character after +' => 'A +1 B', 14 'base64 character after -' => 'A -1 B', 15 'base64 character before and after +' => 'A 1+1 B', 16 'base64 character before and after -' => 'A 1-1 B', 17 'string ends with +' => 'A +', 18 'string ends with -' => 'A -', 19 '+ and -' => 'A +- B', 20 '- and +' => 'A -+ B', 21 'valid direct encoding character =' => 'A = B', 22 'invalid direct encoding character ~' => 'A ~ B', 23 'invalid direct encoding character \\' => 'A \\ B', 24 'invalid direct encoding character ESC' => "A \x1b B", 25 'valid direct encoding character = after +' => 'A += B', 26 'invalid direct encoding character ~ after +' => 'A +~ B', 27 'invalid direct encoding character \\ after +' => 'A +\\ B', 28 'invalid direct encoding character ESC after +' => "A +\x1b B", 29 'valid base64 character between + and -' => 'A +ZeVnLIqe- B', // 日本語 in UTF-16BE 30 'invalid base64 character between + and -' => 'A +ZeVnLIq- B', // 日本語 in UTF-16BE without the last character 31 'valid base64 character between + and non-base64 character' => 'A +ZeVnLIqe B', 32 'invalid base64 character between + and non-base64 character' => 'A +ZeVnLIq B', 33 'valid base64 character between + and base64 character' => 'A +ZeVnLIqe1 B', 34 'invalid base64 character between + and base64 character' => 'A +ZeVnLIq1 B', 35 'valid base64 character between + and end of string' => 'A +ZeVnLIqe', 36 'invalid base64 character between + and end of string' => 'A +ZeVnLIq', 37 'valid base64 character consisting only of + between + and -' => 'A +++++++++- B', 38 'invalid base64 character consisting only of + between + and -' => 'A +++++++++- B', 39 'valid base64 character consisting only of + between + and non-base64 character' => 'A +++++++++ B', 40 'invalid base64 character consisting only of + between + and non-base64 character' => 'A +++++++++ B', 41 'valid base64 character consisting only of + between + and base64 character' => 'A +++++++++1 B', 42 'invalid base64 character consisting only of + between + and base64 character' => 'A +++++++++1 B', 43 'valid base64 character consisting only of + between + and end of string' => 'A +++++++++', 44 'invalid base64 character consisting only of + between + and end of string' => 'A +++++++++', 45 'valid base64 character using surrogate pair between + and -' => 'A +2GfePQ- B', // in UTF-16BE 46 'first 16 bits of base64 character using surrogate pair between + and -' => 'A +2Gc- B', // first 16 bits of in UTF-16BE 47 'valid base64 character using surrogate pair between + and non-base64 character' => 'A +2GfePQ B', 48 'first 16 bits of base64 character using surrogate pair between + and non-base64 character' => 'A +2Gc B', 49 'valid base64 character using surrogate pair between + and base64 character' => 'A +2GfePQ1 B', 50 'first 16 bits of base64 character using surrogate pair between + and base64 character' => 'A +2Gc1 B', 51 'valid base64 character using surrogate pair between + and end of string' => 'A +2GfePQ', 52 'first 16 bits of base64 character using surrogate pair between + and end of string' => 'A +2Gc', 53 'invalid base64 character using surrogate pair in reverse order between + and -' => 'A +3j3YZw- B', // in reverse order in UTF-16BE 54 'last 16 bits of base64 character using surrogate pair in reverse order between + and -' => 'A +3j0- B', // last 16 bits of in UTF-16BE 55 'invalid base64 character using surrogate pair in reverse order between + and non-base64 character' => 'A +3j3YZw B', 56 'last 16 bits of base64 character using surrogate pair in reverse order between + and non-base64 character' => 'A +3j0 B', 57 'invalid base64 character using surrogate pair in reverse order between + and base64 character' => 'A +3j3YZw1 B', 58 'last 16 bits of base64 character using surrogate pair in reverse order between + and base64 character' => 'A +3j01 B', 59 'invalid base64 character using surrogate pair in reverse order between + and end of string' => 'A +3j3YZw', 60 'last 16 bits of base64 character using surrogate pair in reverse order between + and end of string' => 'A +3j0' 61]; 62 63foreach ($testcases as $title => $case) { 64 echo $title . PHP_EOL; 65 var_dump(mb_detect_encoding($case, 'UTF-8, UTF-7', true)); 66 var_dump(mb_detect_encoding($case, 'UTF-8, UTF-7', false)); 67 var_dump(mb_detect_encoding($case, 'UTF-7', true)); 68 var_dump(mb_detect_encoding($case, 'UTF-7', false)); 69 var_dump(mb_check_encoding($case, 'UTF-7')); 70 var_dump(addcslashes(mb_convert_encoding($case, 'UTF-8', 'UTF-7'), "\0..\37\177")); 71 var_dump(mb_get_info('illegal_chars')); 72 echo PHP_EOL; 73} 74?> 75--EXPECT-- 76non-base64 character after + 77string(5) "UTF-8" 78string(5) "UTF-8" 79bool(false) 80string(5) "UTF-7" 81bool(false) 82string(4) "A B" 83int(0) 84 85non-base64 character after - 86string(5) "UTF-8" 87string(5) "UTF-8" 88string(5) "UTF-7" 89string(5) "UTF-7" 90bool(true) 91string(5) "A - B" 92int(0) 93 94base64 character before + 95string(5) "UTF-8" 96string(5) "UTF-8" 97bool(false) 98string(5) "UTF-7" 99bool(false) 100string(5) "A 1 B" 101int(0) 102 103base64 character before - 104string(5) "UTF-8" 105string(5) "UTF-8" 106string(5) "UTF-7" 107string(5) "UTF-7" 108bool(true) 109string(6) "A 1- B" 110int(0) 111 112base64 character after + 113string(5) "UTF-8" 114string(5) "UTF-8" 115bool(false) 116string(5) "UTF-7" 117bool(false) 118string(5) "A ? B" 119int(1) 120 121base64 character after - 122string(5) "UTF-8" 123string(5) "UTF-8" 124string(5) "UTF-7" 125string(5) "UTF-7" 126bool(true) 127string(6) "A -1 B" 128int(1) 129 130base64 character before and after + 131string(5) "UTF-8" 132string(5) "UTF-8" 133bool(false) 134string(5) "UTF-7" 135bool(false) 136string(6) "A 1? B" 137int(2) 138 139base64 character before and after - 140string(5) "UTF-8" 141string(5) "UTF-8" 142string(5) "UTF-7" 143string(5) "UTF-7" 144bool(true) 145string(7) "A 1-1 B" 146int(2) 147 148string ends with + 149string(5) "UTF-7" 150string(5) "UTF-7" 151string(5) "UTF-7" 152string(5) "UTF-7" 153bool(true) 154string(2) "A " 155int(2) 156 157string ends with - 158string(5) "UTF-8" 159string(5) "UTF-8" 160string(5) "UTF-7" 161string(5) "UTF-7" 162bool(true) 163string(3) "A -" 164int(2) 165 166+ and - 167string(5) "UTF-7" 168string(5) "UTF-7" 169string(5) "UTF-7" 170string(5) "UTF-7" 171bool(true) 172string(5) "A + B" 173int(2) 174 175- and + 176string(5) "UTF-8" 177string(5) "UTF-8" 178bool(false) 179string(5) "UTF-7" 180bool(false) 181string(5) "A - B" 182int(2) 183 184valid direct encoding character = 185string(5) "UTF-8" 186string(5) "UTF-8" 187string(5) "UTF-7" 188string(5) "UTF-7" 189bool(true) 190string(5) "A = B" 191int(2) 192 193invalid direct encoding character ~ 194string(5) "UTF-8" 195string(5) "UTF-8" 196bool(false) 197string(5) "UTF-7" 198bool(false) 199string(5) "A ~ B" 200int(2) 201 202invalid direct encoding character \ 203string(5) "UTF-8" 204string(5) "UTF-8" 205bool(false) 206string(5) "UTF-7" 207bool(false) 208string(5) "A \ B" 209int(2) 210 211invalid direct encoding character ESC 212string(5) "UTF-8" 213string(5) "UTF-8" 214bool(false) 215string(5) "UTF-7" 216bool(false) 217string(8) "A \033 B" 218int(2) 219 220valid direct encoding character = after + 221string(5) "UTF-8" 222string(5) "UTF-8" 223bool(false) 224string(5) "UTF-7" 225bool(false) 226string(5) "A = B" 227int(2) 228 229invalid direct encoding character ~ after + 230string(5) "UTF-8" 231string(5) "UTF-8" 232bool(false) 233string(5) "UTF-7" 234bool(false) 235string(5) "A ~ B" 236int(2) 237 238invalid direct encoding character \ after + 239string(5) "UTF-8" 240string(5) "UTF-8" 241bool(false) 242string(5) "UTF-7" 243bool(false) 244string(5) "A \ B" 245int(2) 246 247invalid direct encoding character ESC after + 248string(5) "UTF-8" 249string(5) "UTF-8" 250bool(false) 251string(5) "UTF-7" 252bool(false) 253string(8) "A \033 B" 254int(2) 255 256valid base64 character between + and - 257string(5) "UTF-7" 258string(5) "UTF-7" 259string(5) "UTF-7" 260string(5) "UTF-7" 261bool(true) 262string(13) "A 日本語 B" 263int(2) 264 265invalid base64 character between + and - 266string(5) "UTF-8" 267string(5) "UTF-8" 268bool(false) 269string(5) "UTF-7" 270bool(false) 271string(11) "A 日本? B" 272int(3) 273 274valid base64 character between + and non-base64 character 275string(5) "UTF-7" 276string(5) "UTF-7" 277string(5) "UTF-7" 278string(5) "UTF-7" 279bool(true) 280string(13) "A 日本語 B" 281int(3) 282 283invalid base64 character between + and non-base64 character 284string(5) "UTF-8" 285string(5) "UTF-8" 286bool(false) 287string(5) "UTF-7" 288bool(false) 289string(11) "A 日本? B" 290int(4) 291 292valid base64 character between + and base64 character 293string(5) "UTF-8" 294string(5) "UTF-8" 295bool(false) 296string(5) "UTF-7" 297bool(false) 298string(14) "A 日本語? B" 299int(5) 300 301invalid base64 character between + and base64 character 302string(5) "UTF-8" 303string(5) "UTF-8" 304string(5) "UTF-7" 305string(5) "UTF-7" 306bool(true) 307string(13) "A 日本誵 B" 308int(5) 309 310valid base64 character between + and end of string 311string(5) "UTF-7" 312string(5) "UTF-7" 313string(5) "UTF-7" 314string(5) "UTF-7" 315bool(true) 316string(11) "A 日本語" 317int(5) 318 319invalid base64 character between + and end of string 320string(5) "UTF-8" 321string(5) "UTF-8" 322bool(false) 323string(5) "UTF-7" 324bool(false) 325string(9) "A 日本?" 326int(6) 327 328valid base64 character consisting only of + between + and - 329string(5) "UTF-8" 330string(5) "UTF-8" 331string(5) "UTF-7" 332string(5) "UTF-7" 333bool(true) 334string(13) "A ﯯ뻻 B" 335int(6) 336 337invalid base64 character consisting only of + between + and - 338string(5) "UTF-8" 339string(5) "UTF-8" 340string(5) "UTF-7" 341string(5) "UTF-7" 342bool(true) 343string(13) "A ﯯ뻻 B" 344int(6) 345 346valid base64 character consisting only of + between + and non-base64 character 347string(5) "UTF-8" 348string(5) "UTF-8" 349string(5) "UTF-7" 350string(5) "UTF-7" 351bool(true) 352string(13) "A ﯯ뻻 B" 353int(6) 354 355invalid base64 character consisting only of + between + and non-base64 character 356string(5) "UTF-8" 357string(5) "UTF-8" 358string(5) "UTF-7" 359string(5) "UTF-7" 360bool(true) 361string(13) "A ﯯ뻻 B" 362int(6) 363 364valid base64 character consisting only of + between + and base64 character 365string(5) "UTF-8" 366string(5) "UTF-8" 367bool(false) 368string(5) "UTF-7" 369bool(false) 370string(14) "A ﯯ뻻? B" 371int(7) 372 373invalid base64 character consisting only of + between + and base64 character 374string(5) "UTF-8" 375string(5) "UTF-8" 376bool(false) 377string(5) "UTF-7" 378bool(false) 379string(14) "A ﯯ뻻? B" 380int(8) 381 382valid base64 character consisting only of + between + and end of string 383string(5) "UTF-8" 384string(5) "UTF-8" 385string(5) "UTF-7" 386string(5) "UTF-7" 387bool(true) 388string(11) "A ﯯ뻻" 389int(8) 390 391invalid base64 character consisting only of + between + and end of string 392string(5) "UTF-8" 393string(5) "UTF-8" 394string(5) "UTF-7" 395string(5) "UTF-7" 396bool(true) 397string(11) "A ﯯ뻻" 398int(8) 399 400valid base64 character using surrogate pair between + and - 401string(5) "UTF-8" 402string(5) "UTF-8" 403string(5) "UTF-7" 404string(5) "UTF-7" 405bool(true) 406string(8) "A B" 407int(8) 408 409first 16 bits of base64 character using surrogate pair between + and - 410string(5) "UTF-8" 411string(5) "UTF-8" 412bool(false) 413string(5) "UTF-7" 414bool(false) 415string(5) "A ? B" 416int(9) 417 418valid base64 character using surrogate pair between + and non-base64 character 419string(5) "UTF-8" 420string(5) "UTF-8" 421string(5) "UTF-7" 422string(5) "UTF-7" 423bool(true) 424string(8) "A B" 425int(9) 426 427first 16 bits of base64 character using surrogate pair between + and non-base64 character 428string(5) "UTF-8" 429string(5) "UTF-8" 430bool(false) 431string(5) "UTF-7" 432bool(false) 433string(5) "A ? B" 434int(10) 435 436valid base64 character using surrogate pair between + and base64 character 437string(5) "UTF-8" 438string(5) "UTF-8" 439bool(false) 440string(5) "UTF-7" 441bool(false) 442string(9) "A ? B" 443int(11) 444 445first 16 bits of base64 character using surrogate pair between + and base64 character 446string(5) "UTF-8" 447string(5) "UTF-8" 448bool(false) 449string(5) "UTF-7" 450bool(false) 451string(5) "A ? B" 452int(12) 453 454valid base64 character using surrogate pair between + and end of string 455string(5) "UTF-8" 456string(5) "UTF-8" 457string(5) "UTF-7" 458string(5) "UTF-7" 459bool(true) 460string(6) "A " 461int(12) 462 463first 16 bits of base64 character using surrogate pair between + and end of string 464string(5) "UTF-8" 465string(5) "UTF-8" 466bool(false) 467string(5) "UTF-7" 468bool(false) 469string(3) "A ?" 470int(13) 471 472invalid base64 character using surrogate pair in reverse order between + and - 473string(5) "UTF-8" 474string(5) "UTF-8" 475bool(false) 476string(5) "UTF-7" 477bool(false) 478string(6) "A ?? B" 479int(15) 480 481last 16 bits of base64 character using surrogate pair in reverse order between + and - 482string(5) "UTF-8" 483string(5) "UTF-8" 484bool(false) 485string(5) "UTF-7" 486bool(false) 487string(5) "A ? B" 488int(16) 489 490invalid base64 character using surrogate pair in reverse order between + and non-base64 character 491string(5) "UTF-8" 492string(5) "UTF-8" 493bool(false) 494string(5) "UTF-7" 495bool(false) 496string(6) "A ?? B" 497int(18) 498 499last 16 bits of base64 character using surrogate pair in reverse order between + and non-base64 character 500string(5) "UTF-8" 501string(5) "UTF-8" 502bool(false) 503string(5) "UTF-7" 504bool(false) 505string(5) "A ? B" 506int(19) 507 508invalid base64 character using surrogate pair in reverse order between + and base64 character 509string(5) "UTF-8" 510string(5) "UTF-8" 511bool(false) 512string(5) "UTF-7" 513bool(false) 514string(6) "A ?? B" 515int(21) 516 517last 16 bits of base64 character using surrogate pair in reverse order between + and base64 character 518string(5) "UTF-8" 519string(5) "UTF-8" 520bool(false) 521string(5) "UTF-7" 522bool(false) 523string(6) "A ?? B" 524int(23) 525 526invalid base64 character using surrogate pair in reverse order between + and end of string 527string(5) "UTF-8" 528string(5) "UTF-8" 529bool(false) 530string(5) "UTF-7" 531bool(false) 532string(4) "A ??" 533int(25) 534 535last 16 bits of base64 character using surrogate pair in reverse order between + and end of string 536string(5) "UTF-8" 537string(5) "UTF-8" 538bool(false) 539string(5) "UTF-7" 540bool(false) 541string(3) "A ?" 542int(26) 543