xref: /PHP-7.4/ext/dom/tests/bug76738.phpt (revision 441b6a6f)
1--TEST--
2Bug #76738 Wrong handling of output buffer
3--SKIPIF--
4<?php
5require_once('skipif.inc');
6if (!extension_loaded('mbstring')) die('skip mbstring extension not available');
7?>
8--FILE--
9<?php declare(strict_types=1);
10$test_string = base64_decode('PGRpdiBjbGFzcz0idGlueW1jZS1nZW5lcmF0ZWQtcm9vdC1ibG9jayIgc3R5bGU9Im1hcmdpbjogMHB4OyBwYWRkaW5nOiAwcHg7Ij4KPGRpdiBzdHlsZT0iZmxvYXQ6IHJpZ2h0OyI+PGltZyBhbHQ9IkZvdG8gUmVpbmRsIEhhcmFsZCIgc3JjPSIvcnRlL3VwbG9hZC9mb3RvLmpwZyIgc3R5bGU9IndpZHRoOiAyNTBweDsgaGVpZ2h0OiAyNDlweDsiPjwvZGl2Pgo8ZGl2IHN0eWxlPSJ3aGl0ZS1zcGFjZTogbm93cmFwOyI+CjxwIHN0eWxlPSJtYXJnaW46IDBweDsiPlJlaW5kbCBIYXJhbGQmdXVtbDs8YnI+IFNpZWdmcmllZGdhc3NlIDIyLTI0LzIvNDxicj4gMTIxMCBXaWVuPGJyPiAoKzQzKSAwNjc2IDQwIDIyMSA0MDxicj4gW2JlZm9yZWxpbmtzXTxhIGhyZWY9Ii9zaG93X2NvbnRlbnQucGhwP3NpZD0xMDciPnN1cHBvcnRAcmhzb2Z0Lm5ldDwvYT48YnI+IFtiZWZvcmVsaW5rc108YSBocmVmPSIvcmVpbmRsLWhhcmFsZC52Y2YiPlYtQ2FyZCBoZXJ1bnRlcmxhZGVuPC9hPjxicj4gW2JlZm9yZWxpbmtzXTxhIHRhcmdldD0iX2JsYW5rIiBocmVmPSIvZ3BnL3N1cHBvcnRfcmhzb2Z0Lm5ldC5wdWIudHh0Ij5NYWlsIEdQRy1LZXk8L2E+PC9wPgo8L2Rpdj4KPGRpdiBzdHlsZT0iZmxvYXQ6IG5vbmU7IHBhZGRpbmctdG9wOiAxNXB4OyI+Cjxocj4KPGI+R2VidXJ0c2RhdHVtOiA8L2I+MTIuIE5vdmVtYmVyIDE5NzcsIE1pc3RlbGJhY2gsIE4mT3VtbDsKPGg0PkJFUlVGU0VSRkFIUlVORzwvaDQ+CjxoNT4xOTkzIC0gMjAwMCBJbmcuIEdpbmRsIEdtYkggMjEyMCBXb2xrZXJzZG9yZjwvaDU+Cjx1bD4KPGxpPk1vbnRhZ2UgdW5kIFdhcnR1bmcgdm9uICZvdW1sO2ZmZW50bGljaGVuIEJlbGV1Y2h0dW5nZW48L2xpPgo8bGk+V2FydHVuZyB1bmQgRXJyaWNodHVuZyB2b24gU2lnbmFsYW5sYWdlbjwvbGk+CjxsaT5Xb2huYmF1LUluc3RhbGxhdGlvbmVuPC9saT4KPGxpPkluZnJhc3RydWt0dXJhdXNiYXUgaW4gWnVzYW1tZW5hcmJlaXQgbWl0IGRlciBFVk4gTmllZGVyJm91bWw7c3RlcnJlaWNoPC9saT4KPC91bD4KPGRpdiBjbGFzcz0idGlueW1jZS1nZW5lcmF0ZWQtcm9vdC1ibG9jayIgc3R5bGU9Im1hcmdpbjogMHB4OyBwYWRkaW5nOiAwcHg7Ij4KPGg1PjIwMDEgLSAyMDAyIE4mT3VtbDsgVm9sa3NoaWxmZSAyMTMwIE1pc3RlbGJhY2g8L2g1Pgo8L2Rpdj4KPHVsPgo8bGk+RWxla3Ryb2luc3RhbGxhdGlvbiwgVGVjaG5pc2NoZXIgU3VwcG9ydDwvbGk+CjwvdWw+CjxoNT4yMDAxIC0gMjAwNyBFaW56ZWx1bnRlcm5laG1lcjwvaDU+Cjx1bD4KPGxpPkVudHdpY2tsdW5nIHZvbiBEYXRlbmJhbmstTCZvdW1sO3N1bmdlbiwgSW50ZXJuZXQvSW50cmFuZXQtQXBwbGlrYXRpb25lbjwvbGk+CjxsaT5OZXR6d2Vyay0gdW5kIFNlcnZlcnRlY2huaWs8L2xpPgo8bGk+Q29udGVudC1NYW5hZ21lbnQtU3lzdGVtZTwvbGk+CjxsaT5FLUJ1c2luZXNzPC9saT4KPC91bD4KPGg1PnNlaXQgMjAwODwvaDU+Cjx1bD4KPGxpPjxhIGhyZWY9Imh0dHA6Ly93d3cudGhlbG91bmdlLm5ldC8iIHRhcmdldD0icmh3aW4iIG9uY2xpY2s9InJod2luZm9jdXMoKTsiPnRoZWxvdW5nZSBpbnRlcmFjdGl2ZSBkZXNpZ248L2E+PC9saT4KPGxpPlNvZnR3YXJlLUVudHdpY2tsdW5nPC9saT4KPGxpPlRlY2huaXNjaGUgQWRtaW5pc3RyYXRpb248L2xpPgo8L3VsPgo8aHI+CjxoND5LVVJTRSBVTkQgU0NIVUxVTkdFTjwvaDQ+Cjx1bD4KPGxpPjEwLjA4LiAtIDA5LjEwLjIwMDIgTWljcm9zb2Z0IENlcnRpZmllZCBQcm9mZXNzaW9uYWw8L2xpPgo8bGk+MjkuMDYuIC0gMDkuMDguMjAwMiBXaW5kb3dzIDIwMDAgSW5zdGFsbGF0aW9uIHVuZCBWZXJ3YWx0dW5nPC9saT4KPGxpPjIxLjA1LiAtIDI4LjA2LjIwMDIgV2luZG93cyBOVCA0LjAgVGVjaG5pc2NoZXMgS25vd0hvdyAxLTQ8L2xpPgo8bGk+MjEuMDUuIC0gMjguMDYuMjAwMiBFaW5mJnV1bWw7aHJ1bmcgaW4gTmV0endlcmt0ZWNobm9sb2dpZSAxLTI8L2xpPgo8bGk+MTMuMDIuIC0gMjIuMDUuMjAwMSBFdXJvcCZhdW1sO2lzY2hlciBDb21wdXRlcmYmdXVtbDtocmVyc2NoZWluIChFQ0RMKTwvbGk+CjxsaT4wNS4wMy4gLSAwNi4wNC4yMDAxIEF1cy0gdW5kIFdlaXRlcmJpbGR1bmcgaW0gVmVya2F1ZjwvbGk+CjwvdWw+Cjxocj4KPGg0PkFVU0JJTERVTkc8L2g0Pgo8dWw+CjxsaT4xOTg0IC0gMTk4OCBWb2xrc3NjaHVsZTwvbGk+CjxsaT4xOTg4IC0gMTk5MiBIYXVwdHNjaHVsZTwvbGk+CjxsaT4xOTkyIC0gMTk5MyBQb2x5dGVjaG5pc2NoZXIgTGVocmdhbmcgKE1pdCBBdXN6ZWljaG51bmcpPC9saT4KPGxpPjE5OTMgLSAxOTk3IExhbmRlc2JlcnVmc3NjaHVsZSAoTWl0IEF1c3plaWNobnVuZyk8L2xpPgo8L3VsPgo8aHI+CjxoND5JTlRFUkVTU0VOPC9oND4KPHVsPgo8bGk+TXVzaWssIEtpbm8sIDxhIGhyZWY9Imh0dHA6Ly93d3cua2FyYW9rZS13aWVuLmF0LyIgdGFyZ2V0PSJfYmxhbmsiIHRpdGxlPSJCYWJ1ZGVyJnJzcXVvO3MgfCBNYWNoIGRpZSBXZWx0IHp1IGRlaW5lciBCJnV1bWw7aG5lIHwgS2FyYW9rZSBpbiBXaWVuOiI+S2FyYW9rZTwvYT4KPC9saT4KPGxpPlNwb3J0IHNvZmVybmUgZXMgZGllIEZyZWl6ZWl0IHVuZCBkYXMgV2V0dGVyIHp1bGFzc2VuPC9saT4KPGxpPkVEViB1bmQgSW5mb3JtYXRpb25zdGVjaG5vbG9naWUgYXVjaCBwcml2YXQ8L2xpPgo8bGk+VW50ZXJoYWx0dW5nZWxla3Ryb25payBqZWdsaWNoZXIgQXJ0PC9saT4KPGxpPlNvZnR3YXJlLUVudHdpY2tsdW5nIG1pdCBWaXN1YWwgQmFzaWMgNi4wIHp1ciBwcml2YXRlbiBWZXJ3ZW5kdW5nPC9saT4KPGxpPlRlc3RlbiB2b24gU3lzdGVtZW4gdW5kIE5ldHplcmtlbiBpbSBwcml2YXRlbiB1bmQgZ2VzY2gmYXVtbDtmdGxpY2hlbiBVbWZlbGQ8L2xpPgo8bGk+SW50ZXJuZXQtUHJvZ3JhbW1pZXJ1bmcgKEphdmFTY3JpcHQgLyBDU1MgLyBQSFAgLyBNeVNRTCk8L2xpPgo8bGk+QXVkaW8tIHVuZCBCaWxkYmVhcmJlaXR1bmc8L2xpPgo8L3VsPgo8aHI+CjxoND5NVVNJSzwvaDQ+Cjx1bD4KPGxpPkhlYXZ5LU1ldGFsLCBSb2NrLCBLdXNjaGVsLVJvY2ssIE9sZGllcywgQXVzdHJvLVBvcDwvbGk+CjxsaT5EZWVwIFB1cnBsZSwgSm9lIEx5bm4gVHVybmVyLCBJYW4gR2lsbGFuLCBEZWYgTGVwcGFyZCwgQWVyb3NtaXRoLCBBQy9EQywgRG9ybywgQiZvdW1sO2hzZSBPbmtlbHosIE1ldGFsbGljYSwgR3VucyBOJmFjdXRlOyBSb3NlcywgSnVkYXMgUHJpZXN0LCBIZWxsb3dlZW4sIEtJU1MsIEFsaWNlIENvb3BlciwgQmxhY2sgU2FiYmF0aCwgTWFub3dhciwgTWFnbnVtLCBTYXZhdGFnZSwgVmljdG9yeSwgTGVkIFplcHBlbGluLCBHbGVubiBIdWdoZXMsIE51IFBhZ2FkaTwvbGk+CjxsaT5Kb2FuYSBaaW1tZXIsIFBldGVyIENldGVyYSwgQnJ5YW4gQWRhbXMsIFRvdG8sIFF1ZWVuLCBSRU0sIEV1cm9wZSwgU2NvcnBpb25zLCBXaGl0ZSBMaW9uLCBNZWF0IExvYWYsIEJvbiBKb3ZpLCBEaXJlIFN0cmFpdHMsIFJveGV0dGUsIENoaWNhZ28sIFNhbnRhbmEsIFN0YXR1cyBRdW8sIFN1cnZpdm9yLCBGb3JlaWduZXIsIEJvc3RvbiwgQm9uZmlyZSwgUmFpbmJvdywgR2VuZXNpcywgUG9pc29uLCBKb2huIE5vcnVtLCBSYWVtb25uPC9saT4KPGxpPlNUUywgSGVyYmVydCBHciZvdW1sO25lbXllciwgT3B1cywgRmFsY28sIFRvdGVuIEhvc2VuLCBXb2xmZ2FuZyBBbWJyb3MsIEEzLCBQZXRlciBNYWZmYXksIEtsYXVzIExhZ2UsIFB1ciwgUGV0ZXIgQ29ybmVsaXVzLCBIYW5zaSBEdWptaWM8L2xpPgo8L3VsPgo8aHI+CjxoND5TQ0hBVVNQSUVMRVIgLyBGSUxNRSAvIFNFUklFTjwvaDQ+Cjx1bD4KPGxpPkplYW4gQ2xhdWRlIFZhbiBEYW1tZSwgU3lsdmVzdGVyIFN0YWxsb25lLCBTdGV2ZW4gU2VhZ2FsLCBMb3JlbnpvIExhbWFzLCBOaWNvbGFzIENhZ2UsIFJpY2hhcmQgRGVhbiBBbmRlcnNvbiwgRGVuemVsIFdhc2hpbmd0b24sIENocmlzdG9waGVyIExhbWJlcnQsIE1lZyBSeWFuLCBTYXJhaCBNaWNoZWxsZSBHZWxsYXIsIEFtYW5kYSBUYXBwaW5nIC4uLi48L2xpPgo8bGk+SyZvdW1sO25pZ3JlaWNoIGRlciBIaW1tZWwsIEhlcnIgZGVyIFJpbmdlLCBIaWdobGFuZGVyLCBDb24gQWlyLCBUaGUgUm9jaywgU3RhcmdhdGUsIEluZGVwZW5kZW5jZSBEYXksIFp1bSB0Jm91bWw7dGVuIGZyZWlnZWdlYmVuLCBIYXJkIFRvIEtpbGwsIFRoZSBQYXRyaW90LCBSYW1ibywgUm9ja3ksIEhhcnRlIFppZWxlLCBUaW1lY29wLCBCZXN0IE9mIFRoZSBCZXN0LCBCcnVjZSBMZWUgU3RvcnkgLi4uLjwvbGk+CjxsaT5TdGFyZ2F0ZSwgQW5kcm9tZWRhLCBIaWdod2F5IFRvIEhlbGwsIFJlbmVnYWRlLCBOaWtpdGEsIFByb2ZpbGVyLCBDU0ksIERhcmsgQW5nZWwsIEJ1ZmZ5LCBDaGFybWVkPC9saT4KPC91bD4KPC9kaXY+CjwvZGl2Pgo=');
11$after_load = (new rh_rte_helper_debug)->on_load($test_string);
12
13var_dump($after_load === $test_string);
14
15final class rh_rte_helper_debug
16{
17 public $errors = [];
18
19 public function on_load(string $content): string
20 {
21  $content = utf8_encode($content);
22  $content = $this->add_outer_html($content);
23  $content = $this->remove_garbage($content);
24  $has_root_block = $this->has_root_block($content);
25  if(!$has_root_block)
26  {
27   $content = $this->add_root_block($content);
28  }
29  $content = $this->remove_outer_html($content);
30  $content = mb_convert_encoding($content, 'html-entities', 'UTF-8');
31  $content = utf8_decode($content);
32  return $content;
33 }
34
35 private function has_root_block(string $content): bool
36 {
37  $return = $content;
38  $doc = $this->get_dom($content);
39  if($doc != FALSE)
40  {
41   $xpath = ($doc != FALSE) ? new DomXpath($doc) : FALSE;
42   if($xpath != FALSE)
43   {
44    $path = '//*[contains(concat(" ", normalize-space(@class), " "), "tinymce-generated-root-block")]';
45    $nodes = $xpath->query($path);
46    $nodes_idx = ($nodes != FALSE) ? $nodes->length : 0;
47    if($nodes_idx > 0)
48    {
49     return TRUE;
50    }
51    else
52    {
53     return FALSE;
54    }
55   }
56   else
57   {
58    return FALSE;
59   }
60  }
61  else
62  {
63   return FALSE;
64  }
65 }
66
67 private function add_root_block(string $content): string
68 {
69  $return = $content;
70  $doc = $this->get_dom($content);
71  if($doc != FALSE)
72  {
73   $xpath = ($doc != FALSE) ? new DomXpath($doc) : FALSE;
74   if($xpath != FALSE)
75   {
76    $path = '//*[contains(concat(" ", normalize-space(@class), " "), "tinymce-generated-root-block")]';
77    $nodes = $xpath->query($path);
78    $nodes_idx = ($nodes != FALSE) ? $nodes->length : 0;
79    if($nodes_idx == 0)
80    {
81     $root_div = $doc->createElement('div');
82     $root_div->setAttribute('class', 'tinymce-generated-root-block');
83     $root_div->setAttribute('style', 'margin: 0px; padding: 0px;');
84     $body = $doc->getElementsByTagName('body')->item(0);
85     if($body !== NULL)
86     {
87      while($body->childNodes->length > 0)
88      {
89       $root_div->appendChild($body->childNodes->item(0));
90      }
91      $body->appendChild($root_div);
92      $return = $doc->saveHTML();
93     }
94    }
95   }
96  }
97  return $return;
98 }
99
100 private function add_outer_html(string $content): string
101 {
102  return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html><head><title>Tidy</title></head><body>' . $content . '</body></html>';
103 }
104
105 private function remove_root_block(string $content): string
106 {
107  return $this->remove_block($content, 'tinymce-generated-root-block');
108 }
109
110 private function remove_garbage(string $content): string
111 {
112  return $this->remove_block($content, 'tinymce-garbage-root-block');
113 }
114
115 private function get_dom(string $html, bool $add_outer_html=TRUE): DOMDocument
116 {
117  $use_internal_errors      = libxml_use_internal_errors(TRUE);
118  $dom                      = new DOMDocument;
119  $dom->resolveExternals    = FALSE;
120  $dom->preserveWhiteSpace  = TRUE;
121  $dom->strictErrorChecking = FALSE;
122  $dom->formatOutput        = TRUE;
123  $dom->recover             = TRUE;
124  $dom->validateOnParse     = TRUE;
125  $dom->substituteEntities  = FALSE;
126  $html                     = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
127  $options = 0;
128  if(!$add_outer_html)
129  {
130   $options = LIBXML_HTML_NOIMPLIED;
131  }
132  $options = $options | LIBXML_HTML_NODEFDTD;
133  $loaded = @$dom->loadHTML('<?xml encoding="UTF-8">' . $html, $options);
134  if(!$loaded)
135  {
136   $dom = FALSE;
137   $this->errors = libxml_get_errors();
138  }
139  else
140  {
141   foreach($dom->childNodes as $item)
142   {
143    if($item->nodeType == XML_PI_NODE)
144    {
145     $dom->removeChild($item);
146    }
147   }
148   $dom->encoding = 'UTF-8';
149  }
150  libxml_clear_errors();
151  libxml_use_internal_errors($use_internal_errors);
152  return $dom;
153 }
154
155 private function remove_outer_html(string $content): string
156 {
157  $return = $content;
158  $doc = $this->get_dom($content);
159  if($doc !== FALSE)
160  {
161   if($doc->doctype !== NULL)
162   {
163    $doc->doctype->parentNode->removeChild($doc->doctype);
164   }
165   $html = $doc->getElementsByTagName('html')->item(0);
166   if($html !== NULL)
167   {
168    $fragment = $doc->createDocumentFragment();
169    while($html->childNodes->length > 0)
170    {
171     $childNode = $html->childNodes->item(0);
172     $fragment->appendChild($childNode);
173    }
174    $html->parentNode->replaceChild($fragment, $html);
175   }
176   $body = $doc->getElementsByTagName('body')->item(0);
177   if($body !== NULL)
178   {
179    $return = '';
180    $fragment = $doc->createDocumentFragment();
181    while($body->childNodes->length > 0)
182    {
183     $childNode = $body->childNodes->item(0);
184     $fragment->appendChild($childNode);
185     $return .= $doc->saveHTML($childNode);
186    }
187    $body->parentNode->replaceChild($fragment, $body);
188   }
189   else
190   {
191    $return = $doc->saveHTML();
192   }
193  }
194  return $return;
195 }
196
197 private function remove_block(string $content, string $class='tinymce-generated-root-block'): string
198 {
199  $return = $content;
200  $doc = $this->get_dom($content);
201  if($doc != FALSE)
202  {
203   $xpath = ($doc != FALSE) ? new DomXpath($doc) : FALSE;
204   if($xpath != FALSE)
205   {
206    $path = '//*[contains(concat(" ", normalize-space(@class), " "), "'.$class.'")]';
207    $nodes = $xpath->query($path);
208    $nodes_idx = ($nodes != FALSE) ? $nodes->length : 0;
209    if($nodes_idx > 0)
210    {
211     foreach($nodes as $node)
212     {
213      $fragment = $doc->createDocumentFragment();
214      while($node->childNodes->length > 0)
215      {
216       $childNode = $node->childNodes->item(0);
217       if($childNode->nodeType == XML_TEXT_NODE)
218       {
219        $fragment->appendChild($doc->createTextNode($childNode->nodeValue));
220        $childNode->parentNode->removeChild($childNode);
221       }
222       else
223       {
224        $fragment->appendChild($childNode);
225       }
226      }
227      $node->parentNode->replaceChild($fragment, $node);
228     }
229     $return = $doc->saveHTML();
230    }
231   }
232  }
233  return $return;
234 }
235}
236--EXPECT--
237bool(true)
238