xref: /PHP-8.4/ext/tidy/tidy.c (revision ad452086)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Author: John Coggeshall <john@php.net>                               |
14   +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20 
21 #include "php.h"
22 #include "php_tidy.h"
23 
24 #ifdef HAVE_TIDY
25 
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28 
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34 
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40 
41 #include "tidy_arginfo.h"
42 
43 #include "Zend/zend_exceptions.h"
44 
45 /* compatibility with older versions of libtidy */
46 #ifndef TIDY_CALL
47 #define TIDY_CALL
48 #endif
49 
50 /* {{{ ext/tidy macros */
51 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
52 
53 #define TIDY_SET_CONTEXT \
54     zval *object = getThis();
55 
56 #define TIDY_FETCH_OBJECT	\
57 	PHPTidyObj *obj;	\
58 	zval *object; \
59 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) != SUCCESS) {	\
60 		RETURN_THROWS();	\
61 	}	\
62 	obj = Z_TIDY_P(object);	\
63 
64 #define TIDY_FETCH_INITIALIZED_OBJECT \
65 	TIDY_FETCH_OBJECT; \
66 	if (!obj->ptdoc->initialized) { \
67 		zend_throw_error(NULL, "tidy object is not initialized"); \
68 		return; \
69 	}
70 
71 #define TIDY_FETCH_ONLY_OBJECT	\
72 	PHPTidyObj *obj;	\
73 	TIDY_SET_CONTEXT; \
74 	if (zend_parse_parameters_none() != SUCCESS) {	\
75 		RETURN_THROWS();	\
76 	}	\
77 	obj = Z_TIDY_P(object);	\
78 
79 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
80 	if (TG(default_config) && TG(default_config)[0]) { \
81 		php_tidy_load_config(_doc, TG(default_config)); \
82 	}
83 /* }}} */
84 
85 /* {{{ ext/tidy structs */
86 typedef struct _PHPTidyDoc PHPTidyDoc;
87 typedef struct _PHPTidyObj PHPTidyObj;
88 
89 typedef enum {
90 	is_node,
91 	is_doc
92 } tidy_obj_type;
93 
94 typedef enum {
95 	is_root_node,
96 	is_html_node,
97 	is_head_node,
98 	is_body_node
99 } tidy_base_nodetypes;
100 
101 struct _PHPTidyDoc {
102 	TidyDoc			doc;
103 	TidyBuffer		*errbuf;
104 	unsigned int	ref_count;
105 	unsigned int    initialized:1;
106 };
107 
108 struct _PHPTidyObj {
109 	TidyNode		node;
110 	tidy_obj_type	type;
111 	PHPTidyDoc		*ptdoc;
112 	zend_object		std;
113 };
114 
php_tidy_fetch_object(zend_object * obj)115 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
116 	return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
117 }
118 
119 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
120 /* }}} */
121 
122 /* {{{ ext/tidy prototypes */
123 static zend_string *php_tidy_file_to_mem(const char *, bool);
124 static void tidy_object_free_storage(zend_object *);
125 static zend_object *tidy_object_new_node(zend_class_entry *);
126 static zend_object *tidy_object_new_doc(zend_class_entry *);
127 static zval *tidy_instantiate(zend_class_entry *, zval *);
128 static zend_result tidy_doc_cast_handler(zend_object *, zval *, int);
129 static zend_result tidy_node_cast_handler(zend_object *, zval *, int);
130 static void tidy_doc_update_properties(PHPTidyObj *);
131 static void tidy_add_node_default_properties(PHPTidyObj *);
132 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
133 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
134 static int _php_tidy_set_tidy_opt(TidyDoc, const char *, zval *);
135 static int _php_tidy_apply_config_array(TidyDoc doc, const HashTable *ht_options);
136 static PHP_INI_MH(php_tidy_set_clean_output);
137 static void php_tidy_clean_output_start(const char *name, size_t name_len);
138 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
139 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context);
140 
141 static PHP_MINIT_FUNCTION(tidy);
142 static PHP_MSHUTDOWN_FUNCTION(tidy);
143 static PHP_RINIT_FUNCTION(tidy);
144 static PHP_RSHUTDOWN_FUNCTION(tidy);
145 static PHP_MINFO_FUNCTION(tidy);
146 
147 ZEND_DECLARE_MODULE_GLOBALS(tidy)
148 
149 PHP_INI_BEGIN()
150 STD_PHP_INI_ENTRY("tidy.default_config",	"",		PHP_INI_SYSTEM,		OnUpdateString,				default_config,		zend_tidy_globals,	tidy_globals)
151 STD_PHP_INI_BOOLEAN("tidy.clean_output",	"0",	PHP_INI_USER,		php_tidy_set_clean_output,	clean_output,		zend_tidy_globals,	tidy_globals)
152 PHP_INI_END()
153 
154 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
155 
156 static zend_object_handlers tidy_object_handlers_doc;
157 static zend_object_handlers tidy_object_handlers_node;
158 
159 zend_module_entry tidy_module_entry = {
160 	STANDARD_MODULE_HEADER,
161 	"tidy",
162 	ext_functions,
163 	PHP_MINIT(tidy),
164 	PHP_MSHUTDOWN(tidy),
165 	PHP_RINIT(tidy),
166 	PHP_RSHUTDOWN(tidy),
167 	PHP_MINFO(tidy),
168 	PHP_TIDY_VERSION,
169 	PHP_MODULE_GLOBALS(tidy),
170 	NULL,
171 	NULL,
172 	NULL,
173 	STANDARD_MODULE_PROPERTIES_EX
174 };
175 
176 #ifdef COMPILE_DL_TIDY
177 #ifdef ZTS
178 ZEND_TSRMLS_CACHE_DEFINE()
179 #endif
ZEND_GET_MODULE(tidy)180 ZEND_GET_MODULE(tidy)
181 #endif
182 
183 static void* TIDY_CALL php_tidy_malloc(size_t len)
184 {
185 	return emalloc(len);
186 }
187 
php_tidy_realloc(void * buf,size_t len)188 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
189 {
190 	return erealloc(buf, len);
191 }
192 
php_tidy_free(void * buf)193 static void TIDY_CALL php_tidy_free(void *buf)
194 {
195 	efree(buf);
196 }
197 
php_tidy_panic(ctmbstr msg)198 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
199 {
200 	php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (const char *)msg);
201 }
202 
php_tidy_load_config(TidyDoc doc,const char * path)203 static void php_tidy_load_config(TidyDoc doc, const char *path)
204 {
205 	int ret = tidyLoadConfig(doc, path);
206 	if (ret < 0) {
207 		php_error_docref(NULL, E_WARNING, "Could not load the Tidy configuration file \"%s\"", path);
208 	} else if (ret > 0) {
209 		php_error_docref(NULL, E_NOTICE, "There were errors while parsing the Tidy configuration file \"%s\"", path);
210 	}
211 }
212 
php_tidy_apply_config(TidyDoc doc,const zend_string * str_string,const HashTable * ht_options)213 static zend_result php_tidy_apply_config(TidyDoc doc, const zend_string *str_string, const HashTable *ht_options)
214 {
215 	if (ht_options) {
216 		return _php_tidy_apply_config_array(doc, ht_options);
217 	} else if (str_string) {
218 		if (php_check_open_basedir(ZSTR_VAL(str_string))) {
219 			return FAILURE;
220 		}
221 		php_tidy_load_config(doc, ZSTR_VAL(str_string));
222 	}
223 	return SUCCESS;
224 }
225 
_php_tidy_set_tidy_opt(TidyDoc doc,const char * optname,zval * value)226 static int _php_tidy_set_tidy_opt(TidyDoc doc, const char *optname, zval *value)
227 {
228 	TidyOption opt = tidyGetOptionByName(doc, optname);
229 	zend_string *str, *tmp_str;
230 	zend_long lval;
231 
232 	if (!opt) {
233 		php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
234 		return FAILURE;
235 	}
236 
237 	if (tidyOptIsReadOnly(opt)) {
238 		php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
239 		return FAILURE;
240 	}
241 
242 	switch(tidyOptGetType(opt)) {
243 		case TidyString:
244 			str = zval_get_tmp_string(value, &tmp_str);
245 			if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
246 				zend_tmp_string_release(tmp_str);
247 				return SUCCESS;
248 			}
249 			zend_tmp_string_release(tmp_str);
250 			break;
251 
252 		case TidyInteger:
253 			lval = zval_get_long(value);
254 			if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
255 				return SUCCESS;
256 			}
257 			break;
258 
259 		case TidyBoolean:
260 			lval = zval_get_long(value);
261 			if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
262 				return SUCCESS;
263 			}
264 			break;
265 
266 		default:
267 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
268 			break;
269 	}
270 
271 	return FAILURE;
272 }
273 
tidy_create_node_object(zval * zv,PHPTidyDoc * ptdoc,TidyNode node)274 static void tidy_create_node_object(zval *zv, PHPTidyDoc *ptdoc, TidyNode node)
275 {
276 	tidy_instantiate(tidy_ce_node, zv);
277 	PHPTidyObj *newobj = Z_TIDY_P(zv);
278 	newobj->node = node;
279 	newobj->type = is_node;
280 	newobj->ptdoc = ptdoc;
281 	newobj->ptdoc->ref_count++;
282 	tidy_add_node_default_properties(newobj);
283 }
284 
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,bool is_file)285 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, bool is_file)
286 {
287 	char *enc = NULL;
288 	size_t enc_len = 0;
289 	TidyDoc doc;
290 	TidyBuffer *errbuf;
291 	zend_string *data, *arg1, *config_str = NULL;
292 	HashTable *config_ht = NULL;
293 
294 	if (is_file) {
295 		bool use_include_path = 0;
296 
297 		ZEND_PARSE_PARAMETERS_START(1, 4)
298 			Z_PARAM_PATH_STR(arg1)
299 			Z_PARAM_OPTIONAL
300 			Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
301 			Z_PARAM_STRING(enc, enc_len)
302 			Z_PARAM_BOOL(use_include_path)
303 		ZEND_PARSE_PARAMETERS_END();
304 
305 		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
306 			RETURN_FALSE;
307 		}
308 	} else {
309 		ZEND_PARSE_PARAMETERS_START(1, 3)
310 			Z_PARAM_STR(arg1)
311 			Z_PARAM_OPTIONAL
312 			Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
313 			Z_PARAM_STRING(enc, enc_len)
314 		ZEND_PARSE_PARAMETERS_END();
315 
316 		data = arg1;
317 	}
318 
319 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
320 		if (is_file) {
321 			zend_string_release_ex(data, false);
322 			zend_argument_value_error(1, "File content is too long");
323 		} else {
324 			zend_argument_value_error(1, "is too long");
325 		}
326 		RETURN_THROWS();
327 	}
328 
329 	doc = tidyCreate();
330 	errbuf = emalloc(sizeof(TidyBuffer));
331 	tidyBufInit(errbuf);
332 
333 	if (tidySetErrorBuffer(doc, errbuf) != 0) {
334 		tidyBufFree(errbuf);
335 		efree(errbuf);
336 		tidyRelease(doc);
337 		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
338 	}
339 
340 	tidyOptSetBool(doc, TidyForceOutput, yes);
341 	tidyOptSetBool(doc, TidyMark, no);
342 
343 	TIDY_SET_DEFAULT_CONFIG(doc);
344 
345 	if (php_tidy_apply_config(doc, config_str, config_ht) != SUCCESS) {
346 		RETVAL_FALSE;
347 	} else if (enc_len) {
348 		if (tidySetCharEncoding(doc, enc) < 0) {
349 			php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
350 			RETVAL_FALSE;
351 		}
352 	}
353 
354 	if (data) {
355 		TidyBuffer buf;
356 
357 		tidyBufInit(&buf);
358 		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
359 
360 		if (tidyParseBuffer(doc, &buf) < 0) {
361 			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
362 			RETVAL_FALSE;
363 		} else {
364 			if (tidyCleanAndRepair(doc) >= 0) {
365 				TidyBuffer output;
366 				tidyBufInit(&output);
367 
368 				tidySaveBuffer (doc, &output);
369 				FIX_BUFFER(&output);
370 				RETVAL_STRINGL((const char *) output.bp, output.size ? output.size-1 : 0);
371 				tidyBufFree(&output);
372 			} else {
373 				RETVAL_FALSE;
374 			}
375 		}
376 	}
377 
378 	if (is_file) {
379 		zend_string_release_ex(data, 0);
380 	}
381 
382 	tidyBufFree(errbuf);
383 	efree(errbuf);
384 	tidyRelease(doc);
385 }
386 
php_tidy_file_to_mem(const char * filename,bool use_include_path)387 static zend_string *php_tidy_file_to_mem(const char *filename, bool use_include_path)
388 {
389 	php_stream *stream;
390 	zend_string *data = NULL;
391 
392 	if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
393 		return NULL;
394 	}
395 	if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
396 		data = ZSTR_EMPTY_ALLOC();
397 	}
398 	php_stream_close(stream);
399 
400 	return data;
401 }
402 
tidy_object_free_storage(zend_object * object)403 static void tidy_object_free_storage(zend_object *object)
404 {
405 	PHPTidyObj *intern = php_tidy_fetch_object(object);
406 
407 	zend_object_std_dtor(&intern->std);
408 
409 	if (intern->ptdoc) {
410 		intern->ptdoc->ref_count--;
411 
412 		if (intern->ptdoc->ref_count <= 0) {
413 			tidyBufFree(intern->ptdoc->errbuf);
414 			efree(intern->ptdoc->errbuf);
415 			tidyRelease(intern->ptdoc->doc);
416 			efree(intern->ptdoc);
417 		}
418 	}
419 }
420 
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)421 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
422 {
423 	PHPTidyObj *intern;
424 
425 	intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
426 	zend_object_std_init(&intern->std, class_type);
427 	object_properties_init(&intern->std, class_type);
428 
429 	switch(objtype) {
430 		case is_node:
431 			break;
432 
433 		case is_doc:
434 			intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
435 			intern->ptdoc->doc = tidyCreate();
436 			intern->ptdoc->ref_count = 1;
437 			intern->ptdoc->initialized = 0;
438 			intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
439 			tidyBufInit(intern->ptdoc->errbuf);
440 
441 			if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
442 				tidyBufFree(intern->ptdoc->errbuf);
443 				efree(intern->ptdoc->errbuf);
444 				tidyRelease(intern->ptdoc->doc);
445 				efree(intern->ptdoc);
446 				efree(intern);
447 				php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
448 			}
449 
450 			tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
451 			tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
452 
453 			TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
454 			break;
455 	}
456 
457 	intern->std.handlers = handlers;
458 
459 	return &intern->std;
460 }
461 
tidy_object_new_node(zend_class_entry * class_type)462 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
463 {
464 	return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
465 }
466 
tidy_object_new_doc(zend_class_entry * class_type)467 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
468 {
469 	return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
470 }
471 
tidy_instantiate(zend_class_entry * pce,zval * object)472 static zval *tidy_instantiate(zend_class_entry *pce, zval *object)
473 {
474 	object_init_ex(object, pce);
475 	return object;
476 }
477 
tidy_doc_cast_handler(zend_object * in,zval * out,int type)478 static zend_result tidy_doc_cast_handler(zend_object *in, zval *out, int type)
479 {
480 	TidyBuffer output;
481 	PHPTidyObj *obj;
482 
483 	switch (type) {
484 		case IS_LONG:
485 		case _IS_NUMBER:
486 			ZVAL_LONG(out, 0);
487 			break;
488 
489 		case IS_DOUBLE:
490 			ZVAL_DOUBLE(out, 0);
491 			break;
492 
493 		case _IS_BOOL:
494 			ZVAL_TRUE(out);
495 			break;
496 
497 		case IS_STRING:
498 			obj = php_tidy_fetch_object(in);
499 			tidyBufInit(&output);
500 			tidySaveBuffer (obj->ptdoc->doc, &output);
501 			if (output.size) {
502 				ZVAL_STRINGL(out, (const char *) output.bp, output.size-1);
503 			} else {
504 				ZVAL_EMPTY_STRING(out);
505 			}
506 			tidyBufFree(&output);
507 			break;
508 
509 		default:
510 			return FAILURE;
511 	}
512 
513 	return SUCCESS;
514 }
515 
tidy_node_cast_handler(zend_object * in,zval * out,int type)516 static zend_result tidy_node_cast_handler(zend_object *in, zval *out, int type)
517 {
518 	TidyBuffer buf;
519 	PHPTidyObj *obj;
520 
521 	switch(type) {
522 		case IS_LONG:
523 		case _IS_NUMBER:
524 			ZVAL_LONG(out, 0);
525 			break;
526 
527 		case IS_DOUBLE:
528 			ZVAL_DOUBLE(out, 0);
529 			break;
530 
531 		case _IS_BOOL:
532 			ZVAL_TRUE(out);
533 			break;
534 
535 		case IS_STRING:
536 			obj = php_tidy_fetch_object(in);
537 			tidyBufInit(&buf);
538 			if (obj->ptdoc) {
539 				tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
540 				ZVAL_STRINGL(out, (const char *) buf.bp, buf.size-1);
541 			} else {
542 				ZVAL_EMPTY_STRING(out);
543 			}
544 			tidyBufFree(&buf);
545 			break;
546 
547 		default:
548 			return FAILURE;
549 	}
550 
551 	return SUCCESS;
552 }
553 
tidy_doc_update_properties(PHPTidyObj * obj)554 static void tidy_doc_update_properties(PHPTidyObj *obj)
555 {
556 	TidyBuffer output;
557 
558 	tidyBufInit(&output);
559 	tidySaveBuffer (obj->ptdoc->doc, &output);
560 
561 	if (output.size) {
562 		zend_update_property_stringl(
563 			tidy_ce_doc,
564 			&obj->std,
565 			"value",
566 			sizeof("value") - 1,
567 			(char*) output.bp,
568 			output.size-1
569 		);
570 	}
571 
572 	tidyBufFree(&output);
573 
574 	if (obj->ptdoc->errbuf->size) {
575 		zend_update_property_stringl(
576 			tidy_ce_doc,
577 			&obj->std,
578 			"errorBuffer",
579 			sizeof("errorBuffer") - 1,
580 			(char*) obj->ptdoc->errbuf->bp,
581 			obj->ptdoc->errbuf->size-1
582 		);
583 	}
584 }
585 
tidy_add_node_default_properties(PHPTidyObj * obj)586 static void tidy_add_node_default_properties(PHPTidyObj *obj)
587 {
588 	TidyBuffer buf;
589 	TidyAttr	tempattr;
590 	TidyNode	tempnode;
591 	zval attribute, children, temp;
592 	const char *name;
593 
594 	tidyBufInit(&buf);
595 	tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
596 
597 	zend_update_property_stringl(
598 		tidy_ce_node,
599 		&obj->std,
600 		"value",
601 		sizeof("value") - 1,
602 		buf.size ? (const char *) buf.bp : "",
603 		buf.size ? buf.size - 1 : 0
604 	);
605 
606 	tidyBufFree(&buf);
607 
608 	name = (const char *) tidyNodeGetName(obj->node);
609 
610 	zend_update_property_string(
611 		tidy_ce_node,
612 		&obj->std,
613 		"name",
614 		sizeof("name") - 1,
615 		name ? name : ""
616 	);
617 
618 	zend_update_property_long(
619 		tidy_ce_node,
620 		&obj->std,
621 		"type",
622 		sizeof("type") - 1,
623 		tidyNodeGetType(obj->node)
624 	);
625 
626 	zend_update_property_long(
627 		tidy_ce_node,
628 		&obj->std,
629 		"line",
630 		sizeof("line") - 1,
631 		tidyNodeLine(obj->node)
632 	);
633 
634 	zend_update_property_long(
635 		tidy_ce_node,
636 		&obj->std,
637 		"column",
638 		sizeof("column") - 1,
639 		tidyNodeColumn(obj->node)
640 	);
641 
642 	zend_update_property_bool(
643 		tidy_ce_node,
644 		&obj->std,
645 		"proprietary",
646 		sizeof("proprietary") - 1,
647 		tidyNodeIsProp(obj->ptdoc->doc, obj->node)
648 	);
649 
650 	switch(tidyNodeGetType(obj->node)) {
651 		case TidyNode_Root:
652 		case TidyNode_DocType:
653 		case TidyNode_Text:
654 		case TidyNode_Comment:
655 			zend_update_property_null(
656 				tidy_ce_node,
657 				&obj->std,
658 				"id",
659 				sizeof("id") - 1
660 			);
661 			break;
662 
663 		default:
664 			zend_update_property_long(
665 				tidy_ce_node,
666 				&obj->std,
667 				"id",
668 				sizeof("id") - 1,
669 				tidyNodeGetId(obj->node)
670 			);
671 	}
672 
673 	tempattr = tidyAttrFirst(obj->node);
674 
675 	if (tempattr) {
676 		const char *name, *val;
677 		array_init(&attribute);
678 
679 		do {
680 			name = (const char *)tidyAttrName(tempattr);
681 			val = (const char *)tidyAttrValue(tempattr);
682 			if (name) {
683 				if (val) {
684 					add_assoc_string(&attribute, name, val);
685 				} else {
686 					add_assoc_str(&attribute, name, zend_empty_string);
687 				}
688 			}
689 		} while((tempattr = tidyAttrNext(tempattr)));
690 	} else {
691 		ZVAL_NULL(&attribute);
692 	}
693 
694 	zend_update_property(
695 		tidy_ce_node,
696 		&obj->std,
697 		"attribute",
698 		sizeof("attribute") - 1,
699 		&attribute
700 	);
701 
702 	zval_ptr_dtor(&attribute);
703 
704 	tempnode = tidyGetChild(obj->node);
705 
706 	if (tempnode) {
707 		array_init(&children);
708 		do {
709 			tidy_create_node_object(&temp, obj->ptdoc, tempnode);
710 			add_next_index_zval(&children, &temp);
711 		} while((tempnode = tidyGetNext(tempnode)));
712 
713 	} else {
714 		ZVAL_NULL(&children);
715 	}
716 
717 	zend_update_property(
718 		tidy_ce_node,
719 		&obj->std,
720 		"child",
721 		sizeof("child") - 1,
722 		&children
723 	);
724 
725 	zval_ptr_dtor(&children);
726 }
727 
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)728 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
729 {
730 	*type = tidyOptGetType(opt);
731 
732 	switch (*type) {
733 		case TidyString: {
734 			char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
735 			if (val) {
736 				return (void *) zend_string_init(val, strlen(val), 0);
737 			} else {
738 				return (void *) ZSTR_EMPTY_ALLOC();
739 			}
740 		}
741 			break;
742 
743 		case TidyInteger:
744 			return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
745 			break;
746 
747 		case TidyBoolean:
748 			return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
749 			break;
750 	}
751 
752 	/* should not happen */
753 	return NULL;
754 }
755 
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)756 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
757 {
758 	TidyNode node;
759 	TIDY_FETCH_OBJECT;
760 
761 	switch (node_type) {
762 		case is_root_node:
763 			node = tidyGetRoot(obj->ptdoc->doc);
764 			break;
765 
766 		case is_html_node:
767 			node = tidyGetHtml(obj->ptdoc->doc);
768 			break;
769 
770 		case is_head_node:
771 			node = tidyGetHead(obj->ptdoc->doc);
772 			break;
773 
774 		case is_body_node:
775 			node = tidyGetBody(obj->ptdoc->doc);
776 			break;
777 
778 		EMPTY_SWITCH_DEFAULT_CASE()
779 	}
780 
781 	if (!node) {
782 		RETURN_NULL();
783 	}
784 
785 	tidy_create_node_object(return_value, obj->ptdoc, node);
786 }
787 
_php_tidy_apply_config_array(TidyDoc doc,const HashTable * ht_options)788 static int _php_tidy_apply_config_array(TidyDoc doc, const HashTable *ht_options)
789 {
790 	zval *opt_val;
791 	zend_string *opt_name;
792 
793 	if (!HT_IS_PACKED(ht_options)) {
794 		ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
795 			if (opt_name == NULL) {
796 				continue;
797 			}
798 			_php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
799 		} ZEND_HASH_FOREACH_END();
800 	}
801 	return SUCCESS;
802 }
803 
php_tidy_parse_string(PHPTidyObj * obj,const char * string,uint32_t len,const char * enc)804 static int php_tidy_parse_string(PHPTidyObj *obj, const char *string, uint32_t len, const char *enc)
805 {
806 	TidyBuffer buf;
807 
808 	if(enc) {
809 		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
810 			php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
811 			return FAILURE;
812 		}
813 	}
814 
815 	obj->ptdoc->initialized = 1;
816 
817 	tidyBufInit(&buf);
818 	tidyBufAttach(&buf, (byte *) string, len);
819 	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
820 		php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
821 		return FAILURE;
822 	}
823 	tidy_doc_update_properties(obj);
824 
825 	return SUCCESS;
826 }
827 
PHP_MINIT_FUNCTION(tidy)828 static PHP_MINIT_FUNCTION(tidy)
829 {
830 	tidySetMallocCall(php_tidy_malloc);
831 	tidySetReallocCall(php_tidy_realloc);
832 	tidySetFreeCall(php_tidy_free);
833 	tidySetPanicCall(php_tidy_panic);
834 
835 	REGISTER_INI_ENTRIES();
836 
837 	tidy_ce_doc = register_class_tidy();
838 	tidy_ce_doc->create_object = tidy_object_new_doc;
839 	memcpy(&tidy_object_handlers_doc, &std_object_handlers, sizeof(zend_object_handlers));
840 	tidy_object_handlers_doc.clone_obj = NULL;
841 
842 	tidy_ce_node = register_class_tidyNode();
843 	tidy_ce_node->create_object = tidy_object_new_node;
844 	memcpy(&tidy_object_handlers_node, &std_object_handlers, sizeof(zend_object_handlers));
845 	tidy_object_handlers_node.clone_obj = NULL;
846 
847 	tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
848 	tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
849 
850 	tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
851 	tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
852 
853 	register_tidy_symbols(module_number);
854 
855 	php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
856 
857 	return SUCCESS;
858 }
859 
PHP_RINIT_FUNCTION(tidy)860 static PHP_RINIT_FUNCTION(tidy)
861 {
862 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
863 	ZEND_TSRMLS_CACHE_UPDATE();
864 #endif
865 
866 	php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
867 
868 	return SUCCESS;
869 }
870 
PHP_RSHUTDOWN_FUNCTION(tidy)871 static PHP_RSHUTDOWN_FUNCTION(tidy)
872 {
873 	TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
874 
875 	return SUCCESS;
876 }
877 
PHP_MSHUTDOWN_FUNCTION(tidy)878 static PHP_MSHUTDOWN_FUNCTION(tidy)
879 {
880 	UNREGISTER_INI_ENTRIES();
881 	return SUCCESS;
882 }
883 
PHP_MINFO_FUNCTION(tidy)884 static PHP_MINFO_FUNCTION(tidy)
885 {
886 	php_info_print_table_start();
887 	php_info_print_table_row(2, "Tidy support", "enabled");
888 #ifdef HAVE_TIDYBUFFIO_H
889 	php_info_print_table_row(2, "libTidy Version", (const char *)tidyLibraryVersion());
890 #elif defined(HAVE_TIDYP_H)
891 	php_info_print_table_row(2, "libtidyp Version", (const char *)tidyVersion());
892 #endif
893 #ifdef HAVE_TIDYRELEASEDATE
894 	php_info_print_table_row(2, "libTidy Release", (const char *)tidyReleaseDate());
895 #endif
896 	php_info_print_table_end();
897 
898 	DISPLAY_INI_ENTRIES();
899 }
900 
PHP_INI_MH(php_tidy_set_clean_output)901 static PHP_INI_MH(php_tidy_set_clean_output)
902 {
903 	int status;
904 	bool value;
905 
906 	value = zend_ini_parse_bool(new_value);
907 
908 	if (stage == PHP_INI_STAGE_RUNTIME) {
909 		status = php_output_get_status();
910 
911 		if (value && (status & PHP_OUTPUT_WRITTEN)) {
912 			php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
913 			return FAILURE;
914 		}
915 		if (status & PHP_OUTPUT_SENT) {
916 			php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
917 			return FAILURE;
918 		}
919 	}
920 
921 	status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
922 
923 	if (stage == PHP_INI_STAGE_RUNTIME && value) {
924 		if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
925 			php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
926 		}
927 	}
928 
929 	return status;
930 }
931 
932 /*
933  * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
934  */
935 
php_tidy_clean_output_start(const char * name,size_t name_len)936 static void php_tidy_clean_output_start(const char *name, size_t name_len)
937 {
938 	php_output_handler *h;
939 
940 	if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
941 		php_output_handler_start(h);
942 	}
943 }
944 
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)945 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
946 {
947 	if (chunk_size) {
948 		php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
949 		return NULL;
950 	}
951 	if (!TG(clean_output)) {
952 		TG(clean_output) = 1;
953 	}
954 	return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
955 }
956 
php_tidy_output_handler(void ** nothing,php_output_context * output_context)957 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context)
958 {
959 	zend_result status = FAILURE;
960 	TidyDoc doc;
961 	TidyBuffer inbuf, outbuf, errbuf;
962 
963 	if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
964 		doc = tidyCreate();
965 		tidyBufInit(&errbuf);
966 
967 		if (0 == tidySetErrorBuffer(doc, &errbuf)) {
968 			tidyOptSetBool(doc, TidyForceOutput, yes);
969 			tidyOptSetBool(doc, TidyMark, no);
970 
971 			if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
972 				php_error_docref(NULL, E_WARNING, "File content is too long");
973 				return status;
974 			}
975 
976 			TIDY_SET_DEFAULT_CONFIG(doc);
977 
978 			tidyBufInit(&inbuf);
979 			tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
980 
981 			if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
982 				tidyBufInit(&outbuf);
983 				tidySaveBuffer(doc, &outbuf);
984 				FIX_BUFFER(&outbuf);
985 				output_context->out.data = (char *) outbuf.bp;
986 				output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
987 				output_context->out.free = 1;
988 				status = SUCCESS;
989 			}
990 		}
991 
992 		tidyRelease(doc);
993 		tidyBufFree(&errbuf);
994 	}
995 
996 	return status;
997 }
998 
999 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1000 PHP_FUNCTION(tidy_parse_string)
1001 {
1002 	char *enc = NULL;
1003 	size_t enc_len = 0;
1004 	zend_string *input, *options_str = NULL;
1005 	HashTable *options_ht = NULL;
1006 	PHPTidyObj *obj;
1007 
1008 	ZEND_PARSE_PARAMETERS_START(1, 3)
1009 		Z_PARAM_STR(input)
1010 		Z_PARAM_OPTIONAL
1011 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1012 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1013 	ZEND_PARSE_PARAMETERS_END();
1014 
1015 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1016 		zend_argument_value_error(1, "is too long");
1017 		RETURN_THROWS();
1018 	}
1019 
1020 	tidy_instantiate(tidy_ce_doc, return_value);
1021 	obj = Z_TIDY_P(return_value);
1022 
1023 	if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1024 	 || php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) != SUCCESS) {
1025 		zval_ptr_dtor(return_value);
1026 		RETURN_FALSE;
1027 	}
1028 }
1029 /* }}} */
1030 
1031 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1032 PHP_FUNCTION(tidy_get_error_buffer)
1033 {
1034 	TIDY_FETCH_OBJECT;
1035 
1036 	if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1037 		RETURN_STRINGL((const char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1038 	} else {
1039 		RETURN_FALSE;
1040 	}
1041 }
1042 /* }}} */
1043 
1044 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1045 PHP_FUNCTION(tidy_get_output)
1046 {
1047 	TidyBuffer output;
1048 	TIDY_FETCH_OBJECT;
1049 
1050 	tidyBufInit(&output);
1051 	tidySaveBuffer(obj->ptdoc->doc, &output);
1052 	FIX_BUFFER(&output);
1053 	RETVAL_STRINGL((const char *) output.bp, output.size ? output.size-1 : 0);
1054 	tidyBufFree(&output);
1055 }
1056 /* }}} */
1057 
1058 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1059 PHP_FUNCTION(tidy_parse_file)
1060 {
1061 	char *enc = NULL;
1062 	size_t enc_len = 0;
1063 	bool use_include_path = 0;
1064 	zend_string *inputfile, *contents, *options_str = NULL;
1065 	HashTable *options_ht = NULL;
1066 
1067 	PHPTidyObj *obj;
1068 
1069 	ZEND_PARSE_PARAMETERS_START(1, 4)
1070 		Z_PARAM_PATH_STR(inputfile)
1071 		Z_PARAM_OPTIONAL
1072 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1073 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1074 		Z_PARAM_BOOL(use_include_path)
1075 	ZEND_PARSE_PARAMETERS_END();
1076 
1077 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1078 		php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1079 		RETURN_FALSE;
1080 	}
1081 
1082 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1083 		zend_string_release_ex(contents, 0);
1084 		zend_value_error("File content is too long");
1085 		RETURN_THROWS();
1086 	}
1087 
1088 	tidy_instantiate(tidy_ce_doc, return_value);
1089 	obj = Z_TIDY_P(return_value);
1090 
1091 	if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1092 	 || php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) != SUCCESS) {
1093 		zval_ptr_dtor(return_value);
1094 		RETVAL_FALSE;
1095 	}
1096 
1097 	zend_string_release_ex(contents, 0);
1098 }
1099 /* }}} */
1100 
1101 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1102 PHP_FUNCTION(tidy_clean_repair)
1103 {
1104 	TIDY_FETCH_OBJECT;
1105 
1106 	if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1107 		tidy_doc_update_properties(obj);
1108 		RETURN_TRUE;
1109 	}
1110 
1111 	RETURN_FALSE;
1112 }
1113 /* }}} */
1114 
1115 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1116 PHP_FUNCTION(tidy_repair_string)
1117 {
1118 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1119 }
1120 /* }}} */
1121 
1122 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1123 PHP_FUNCTION(tidy_repair_file)
1124 {
1125 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1126 }
1127 /* }}} */
1128 
1129 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1130 PHP_FUNCTION(tidy_diagnose)
1131 {
1132 	TIDY_FETCH_OBJECT;
1133 
1134 	if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1135 		tidy_doc_update_properties(obj);
1136 		RETURN_TRUE;
1137 	}
1138 
1139 	RETURN_FALSE;
1140 }
1141 /* }}} */
1142 
1143 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1144 PHP_FUNCTION(tidy_get_release)
1145 {
1146 	if (zend_parse_parameters_none() != SUCCESS) {
1147 		RETURN_THROWS();
1148 	}
1149 
1150 #ifdef HAVE_TIDYRELEASEDATE
1151 	RETURN_STRING((const char *)tidyReleaseDate());
1152 #else
1153 	RETURN_STRING((const char *)"unknown");
1154 #endif
1155 }
1156 /* }}} */
1157 
1158 
1159 #ifdef HAVE_TIDYOPTGETDOC
1160 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1161 PHP_FUNCTION(tidy_get_opt_doc)
1162 {
1163 	PHPTidyObj *obj;
1164 	const char *optval;
1165 	char *optname;
1166 	size_t optname_len;
1167 	TidyOption opt;
1168 	zval *object;
1169 
1170 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) != SUCCESS) {
1171 		RETURN_THROWS();
1172 	}
1173 
1174 	obj = Z_TIDY_P(object);
1175 
1176 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1177 
1178 	if (!opt) {
1179 		zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1180 		RETURN_THROWS();
1181 	}
1182 
1183 	if ( (optval = (const char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1184 		RETURN_STRING(optval);
1185 	}
1186 
1187 	RETURN_FALSE;
1188 }
1189 /* }}} */
1190 #endif
1191 
1192 
1193 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1194 PHP_FUNCTION(tidy_get_config)
1195 {
1196 	TidyIterator itOpt;
1197 	const char *opt_name;
1198 	void *opt_value;
1199 	TidyOptionType optt;
1200 
1201 	TIDY_FETCH_OBJECT;
1202 
1203 	itOpt = tidyGetOptionList(obj->ptdoc->doc);
1204 
1205 	array_init(return_value);
1206 
1207 	while (itOpt) {
1208 		TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1209 
1210 		opt_name = (const char *)tidyOptGetName(opt);
1211 		opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1212 		switch (optt) {
1213 			case TidyString:
1214 				add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1215 				break;
1216 
1217 			case TidyInteger:
1218 				add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1219 				break;
1220 
1221 			case TidyBoolean:
1222 				add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1223 				break;
1224 		}
1225 	}
1226 }
1227 /* }}} */
1228 
1229 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1230 PHP_FUNCTION(tidy_get_status)
1231 {
1232 	TIDY_FETCH_OBJECT;
1233 
1234 	RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1235 }
1236 /* }}} */
1237 
1238 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1239 PHP_FUNCTION(tidy_get_html_ver)
1240 {
1241 	TIDY_FETCH_INITIALIZED_OBJECT;
1242 
1243 	RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1244 }
1245 /* }}} */
1246 
1247 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1248 PHP_FUNCTION(tidy_is_xhtml)
1249 {
1250 	TIDY_FETCH_INITIALIZED_OBJECT;
1251 
1252 	RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1253 }
1254 /* }}} */
1255 
1256 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1257 PHP_FUNCTION(tidy_is_xml)
1258 {
1259 	TIDY_FETCH_INITIALIZED_OBJECT;
1260 
1261 	RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1262 }
1263 /* }}} */
1264 
1265 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1266 PHP_FUNCTION(tidy_error_count)
1267 {
1268 	TIDY_FETCH_OBJECT;
1269 
1270 	RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1271 }
1272 /* }}} */
1273 
1274 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1275 PHP_FUNCTION(tidy_warning_count)
1276 {
1277 	TIDY_FETCH_OBJECT;
1278 
1279 	RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1280 }
1281 /* }}} */
1282 
1283 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1284 PHP_FUNCTION(tidy_access_count)
1285 {
1286 	TIDY_FETCH_OBJECT;
1287 
1288 	RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1289 }
1290 /* }}} */
1291 
1292 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1293 PHP_FUNCTION(tidy_config_count)
1294 {
1295 	TIDY_FETCH_OBJECT;
1296 
1297 	RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1298 }
1299 /* }}} */
1300 
1301 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1302 PHP_FUNCTION(tidy_getopt)
1303 {
1304 	PHPTidyObj *obj;
1305 	char *optname;
1306 	void *optval;
1307 	size_t optname_len;
1308 	TidyOption opt;
1309 	TidyOptionType optt;
1310 	zval *object;
1311 
1312 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) != SUCCESS) {
1313 		RETURN_THROWS();
1314 	}
1315 
1316 	obj = Z_TIDY_P(object);
1317 
1318 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1319 
1320 	if (!opt) {
1321 		zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1322 		RETURN_THROWS();
1323 	}
1324 
1325 	optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1326 	switch (optt) {
1327 		case TidyString:
1328 			RETVAL_STR((zend_string*)optval);
1329 			return;
1330 
1331 		case TidyInteger:
1332 			RETURN_LONG((zend_long)optval);
1333 			break;
1334 
1335 		case TidyBoolean:
1336 			if (optval) {
1337 				RETURN_TRUE;
1338 			} else {
1339 				RETURN_FALSE;
1340 			}
1341 			break;
1342 
1343 		default:
1344 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1345 			break;
1346 	}
1347 
1348 	RETURN_FALSE;
1349 }
1350 /* }}} */
1351 
PHP_METHOD(tidy,__construct)1352 PHP_METHOD(tidy, __construct)
1353 {
1354 	char *enc = NULL;
1355 	size_t enc_len = 0;
1356 	bool use_include_path = 0;
1357 	HashTable *options_ht = NULL;
1358 	zend_string *contents, *inputfile = NULL, *options_str = NULL;
1359 	PHPTidyObj *obj;
1360 
1361 	ZEND_PARSE_PARAMETERS_START(0, 4)
1362 		Z_PARAM_OPTIONAL
1363 		Z_PARAM_PATH_STR_OR_NULL(inputfile)
1364 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1365 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1366 		Z_PARAM_BOOL(use_include_path)
1367 	ZEND_PARSE_PARAMETERS_END();
1368 
1369 	TIDY_SET_CONTEXT;
1370 	obj = Z_TIDY_P(object);
1371 
1372 	if (inputfile) {
1373 		if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1374 			zend_throw_error(zend_ce_exception, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1375 			RETURN_THROWS();
1376 		}
1377 
1378 		if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1379 			zend_string_release_ex(contents, 0);
1380 			zend_value_error("File content is too long");
1381 			RETURN_THROWS();
1382 		}
1383 
1384 		zend_error_handling error_handling;
1385 		zend_replace_error_handling(EH_THROW, NULL, &error_handling);
1386 		if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS) {
1387 			zend_restore_error_handling(&error_handling);
1388 			zend_string_release_ex(contents, 0);
1389 			RETURN_THROWS();
1390 		}
1391 		zend_restore_error_handling(&error_handling);
1392 
1393 		php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1394 
1395 		zend_string_release_ex(contents, 0);
1396 	}
1397 }
1398 
PHP_METHOD(tidy,parseFile)1399 PHP_METHOD(tidy, parseFile)
1400 {
1401 	char *enc = NULL;
1402 	size_t enc_len = 0;
1403 	bool use_include_path = 0;
1404 	HashTable *options_ht = NULL;
1405 	zend_string *inputfile, *contents, *options_str = NULL;
1406 	PHPTidyObj *obj;
1407 
1408 	ZEND_PARSE_PARAMETERS_START(1, 4)
1409 		Z_PARAM_PATH_STR(inputfile)
1410 		Z_PARAM_OPTIONAL
1411 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1412 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1413 		Z_PARAM_BOOL(use_include_path)
1414 	ZEND_PARSE_PARAMETERS_END();
1415 
1416 	TIDY_SET_CONTEXT;
1417 	obj = Z_TIDY_P(object);
1418 
1419 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1420 		php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1421 		RETURN_FALSE;
1422 	}
1423 
1424 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1425 		zend_string_release_ex(contents, 0);
1426 		zend_value_error("File content is too long");
1427 		RETURN_THROWS();
1428 	}
1429 
1430 	RETVAL_BOOL(php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) == SUCCESS
1431 				&& php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == SUCCESS);
1432 
1433 	zend_string_release_ex(contents, 0);
1434 }
1435 
PHP_METHOD(tidy,parseString)1436 PHP_METHOD(tidy, parseString)
1437 {
1438 	char *enc = NULL;
1439 	size_t enc_len = 0;
1440 	HashTable *options_ht = NULL;
1441 	PHPTidyObj *obj;
1442 	zend_string *input, *options_str = NULL;
1443 
1444 	ZEND_PARSE_PARAMETERS_START(1, 3)
1445 		Z_PARAM_STR(input)
1446 		Z_PARAM_OPTIONAL
1447 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1448 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1449 	ZEND_PARSE_PARAMETERS_END();
1450 
1451 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1452 		zend_argument_value_error(1, "is too long");
1453 		RETURN_THROWS();
1454 	}
1455 
1456 	TIDY_SET_CONTEXT;
1457 	obj = Z_TIDY_P(object);
1458 
1459 	RETURN_BOOL(php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) == SUCCESS
1460 				&& php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS);
1461 }
1462 
1463 
1464 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1465 PHP_FUNCTION(tidy_get_root)
1466 {
1467 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1468 }
1469 /* }}} */
1470 
1471 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1472 PHP_FUNCTION(tidy_get_html)
1473 {
1474 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1475 }
1476 /* }}} */
1477 
1478 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1479 PHP_FUNCTION(tidy_get_head)
1480 {
1481 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1482 }
1483 /* }}} */
1484 
1485 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1486 PHP_FUNCTION(tidy_get_body)
1487 {
1488 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1489 }
1490 /* }}} */
1491 
1492 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1493 PHP_METHOD(tidyNode, hasChildren)
1494 {
1495 	TIDY_FETCH_ONLY_OBJECT;
1496 
1497 	if (tidyGetChild(obj->node)) {
1498 		RETURN_TRUE;
1499 	} else {
1500 		RETURN_FALSE;
1501 	}
1502 }
1503 /* }}} */
1504 
1505 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1506 PHP_METHOD(tidyNode, hasSiblings)
1507 {
1508 	TIDY_FETCH_ONLY_OBJECT;
1509 
1510 	if (obj->node && tidyGetNext(obj->node)) {
1511 		RETURN_TRUE;
1512 	} else {
1513 		RETURN_FALSE;
1514 	}
1515 }
1516 /* }}} */
1517 
1518 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1519 PHP_METHOD(tidyNode, isComment)
1520 {
1521 	TIDY_FETCH_ONLY_OBJECT;
1522 
1523 	if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1524 		RETURN_TRUE;
1525 	} else {
1526 		RETURN_FALSE;
1527 	}
1528 }
1529 /* }}} */
1530 
1531 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1532 PHP_METHOD(tidyNode, isHtml)
1533 {
1534 	TIDY_FETCH_ONLY_OBJECT;
1535 
1536 	switch (tidyNodeGetType(obj->node)) {
1537 		case TidyNode_Start:
1538 		case TidyNode_End:
1539 		case TidyNode_StartEnd:
1540 			RETURN_TRUE;
1541 		default:
1542 			RETURN_FALSE;
1543 	}
1544 }
1545 /* }}} */
1546 
1547 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1548 PHP_METHOD(tidyNode, isText)
1549 {
1550 	TIDY_FETCH_ONLY_OBJECT;
1551 
1552 	if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1553 		RETURN_TRUE;
1554 	} else {
1555 		RETURN_FALSE;
1556 	}
1557 }
1558 /* }}} */
1559 
1560 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1561 PHP_METHOD(tidyNode, isJste)
1562 {
1563 	TIDY_FETCH_ONLY_OBJECT;
1564 
1565 	if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1566 		RETURN_TRUE;
1567 	} else {
1568 		RETURN_FALSE;
1569 	}
1570 }
1571 /* }}} */
1572 
1573 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1574 PHP_METHOD(tidyNode, isAsp)
1575 {
1576 	TIDY_FETCH_ONLY_OBJECT;
1577 
1578 	if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1579 		RETURN_TRUE;
1580 	} else {
1581 		RETURN_FALSE;
1582 	}
1583 }
1584 /* }}} */
1585 
1586 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1587 PHP_METHOD(tidyNode, isPhp)
1588 {
1589 	TIDY_FETCH_ONLY_OBJECT;
1590 
1591 	if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1592 		RETURN_TRUE;
1593 	} else {
1594 		RETURN_FALSE;
1595 	}
1596 }
1597 /* }}} */
1598 
1599 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1600 PHP_METHOD(tidyNode, getParent)
1601 {
1602 	TIDY_FETCH_ONLY_OBJECT;
1603 
1604 	TidyNode parent_node = tidyGetParent(obj->node);
1605 	if (parent_node) {
1606 		tidy_create_node_object(return_value, obj->ptdoc, parent_node);
1607 	}
1608 }
1609 /* }}} */
1610 
PHP_METHOD(tidyNode,getPreviousSibling)1611 PHP_METHOD(tidyNode, getPreviousSibling)
1612 {
1613 	TIDY_FETCH_ONLY_OBJECT;
1614 
1615 	TidyNode previous_node = tidyGetPrev(obj->node);
1616 	if (previous_node) {
1617 		tidy_create_node_object(return_value, obj->ptdoc, previous_node);
1618 	}
1619 }
1620 
PHP_METHOD(tidyNode,getNextSibling)1621 PHP_METHOD(tidyNode, getNextSibling)
1622 {
1623 	TIDY_FETCH_ONLY_OBJECT;
1624 
1625 	TidyNode next_node = tidyGetNext(obj->node);
1626 	if (next_node) {
1627 		tidy_create_node_object(return_value, obj->ptdoc, next_node);
1628 	}
1629 }
1630 
1631 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1632 PHP_METHOD(tidyNode, __construct)
1633 {
1634 	zend_throw_error(NULL, "You should not create a tidyNode manually");
1635 }
1636 /* }}} */
1637 
1638 #endif
1639