xref: /PHP-8.2/ext/tidy/tidy.c (revision 8de7ccb2)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Author: John Coggeshall <john@php.net>                               |
14   +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include "php.h"
22 #include "php_tidy.h"
23 
24 #ifdef HAVE_TIDY
25 
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28 
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34 
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40 
41 #include "tidy_arginfo.h"
42 
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47 
48 /* {{{ ext/tidy macros */
49 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
50 
51 #define TIDY_SET_CONTEXT \
52     zval *object = getThis();
53 
54 #define TIDY_FETCH_OBJECT	\
55 	PHPTidyObj *obj;	\
56 	zval *object; \
57 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) == FAILURE) {	\
58 		RETURN_THROWS();	\
59 	}	\
60 	obj = Z_TIDY_P(object);	\
61 
62 #define TIDY_FETCH_INITIALIZED_OBJECT \
63 	TIDY_FETCH_OBJECT; \
64 	if (!obj->ptdoc->initialized) { \
65 		zend_throw_error(NULL, "tidy object is not initialized"); \
66 		return; \
67 	}
68 
69 #define TIDY_FETCH_ONLY_OBJECT	\
70 	PHPTidyObj *obj;	\
71 	TIDY_SET_CONTEXT; \
72 	if (zend_parse_parameters_none() == FAILURE) {	\
73 		RETURN_THROWS();	\
74 	}	\
75 	obj = Z_TIDY_P(object);	\
76 
77 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
78 	if (TG(default_config) && TG(default_config)[0]) { \
79 		php_tidy_load_config(_doc, TG(default_config)); \
80 	}
81 /* }}} */
82 
83 /* {{{ ext/tidy structs */
84 typedef struct _PHPTidyDoc PHPTidyDoc;
85 typedef struct _PHPTidyObj PHPTidyObj;
86 
87 typedef enum {
88 	is_node,
89 	is_doc
90 } tidy_obj_type;
91 
92 typedef enum {
93 	is_root_node,
94 	is_html_node,
95 	is_head_node,
96 	is_body_node
97 } tidy_base_nodetypes;
98 
99 struct _PHPTidyDoc {
100 	TidyDoc			doc;
101 	TidyBuffer		*errbuf;
102 	unsigned int	ref_count;
103 	unsigned int    initialized:1;
104 };
105 
106 struct _PHPTidyObj {
107 	TidyNode		node;
108 	tidy_obj_type	type;
109 	PHPTidyDoc		*ptdoc;
110 	zend_object		std;
111 };
112 
php_tidy_fetch_object(zend_object * obj)113 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
114 	return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
115 }
116 
117 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
118 /* }}} */
119 
120 /* {{{ ext/tidy prototypes */
121 static zend_string *php_tidy_file_to_mem(char *, bool);
122 static void tidy_object_free_storage(zend_object *);
123 static zend_object *tidy_object_new_node(zend_class_entry *);
124 static zend_object *tidy_object_new_doc(zend_class_entry *);
125 static zval *tidy_instantiate(zend_class_entry *, zval *);
126 static zend_result tidy_doc_cast_handler(zend_object *, zval *, int);
127 static zend_result tidy_node_cast_handler(zend_object *, zval *, int);
128 static void tidy_doc_update_properties(PHPTidyObj *);
129 static void tidy_add_node_default_properties(PHPTidyObj *);
130 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
131 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
132 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
133 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
134 static PHP_INI_MH(php_tidy_set_clean_output);
135 static void php_tidy_clean_output_start(const char *name, size_t name_len);
136 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
137 static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
138 
139 static PHP_MINIT_FUNCTION(tidy);
140 static PHP_MSHUTDOWN_FUNCTION(tidy);
141 static PHP_RINIT_FUNCTION(tidy);
142 static PHP_RSHUTDOWN_FUNCTION(tidy);
143 static PHP_MINFO_FUNCTION(tidy);
144 
145 ZEND_DECLARE_MODULE_GLOBALS(tidy)
146 
147 PHP_INI_BEGIN()
148 STD_PHP_INI_ENTRY("tidy.default_config",	"",		PHP_INI_SYSTEM,		OnUpdateString,				default_config,		zend_tidy_globals,	tidy_globals)
149 STD_PHP_INI_BOOLEAN("tidy.clean_output",	"0",	PHP_INI_USER,		php_tidy_set_clean_output,	clean_output,		zend_tidy_globals,	tidy_globals)
150 PHP_INI_END()
151 
152 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
153 
154 static zend_object_handlers tidy_object_handlers_doc;
155 static zend_object_handlers tidy_object_handlers_node;
156 
157 zend_module_entry tidy_module_entry = {
158 	STANDARD_MODULE_HEADER,
159 	"tidy",
160 	ext_functions,
161 	PHP_MINIT(tidy),
162 	PHP_MSHUTDOWN(tidy),
163 	PHP_RINIT(tidy),
164 	PHP_RSHUTDOWN(tidy),
165 	PHP_MINFO(tidy),
166 	PHP_TIDY_VERSION,
167 	PHP_MODULE_GLOBALS(tidy),
168 	NULL,
169 	NULL,
170 	NULL,
171 	STANDARD_MODULE_PROPERTIES_EX
172 };
173 
174 #ifdef COMPILE_DL_TIDY
175 #ifdef ZTS
176 ZEND_TSRMLS_CACHE_DEFINE()
177 #endif
ZEND_GET_MODULE(tidy)178 ZEND_GET_MODULE(tidy)
179 #endif
180 
181 static void* TIDY_CALL php_tidy_malloc(size_t len)
182 {
183 	return emalloc(len);
184 }
185 
php_tidy_realloc(void * buf,size_t len)186 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
187 {
188 	return erealloc(buf, len);
189 }
190 
php_tidy_free(void * buf)191 static void TIDY_CALL php_tidy_free(void *buf)
192 {
193 	efree(buf);
194 }
195 
php_tidy_panic(ctmbstr msg)196 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
197 {
198 	php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
199 }
200 
php_tidy_load_config(TidyDoc doc,const char * path)201 static void php_tidy_load_config(TidyDoc doc, const char *path)
202 {
203 	int ret = tidyLoadConfig(doc, path);
204 	if (ret < 0) {
205 		php_error_docref(NULL, E_WARNING, "Could not load the Tidy configuration file \"%s\"", path);
206 	} else if (ret > 0) {
207 		php_error_docref(NULL, E_NOTICE, "There were errors while parsing the Tidy configuration file \"%s\"", path);
208 	}
209 }
210 
php_tidy_apply_config(TidyDoc doc,zend_string * str_string,HashTable * ht_options)211 static zend_result php_tidy_apply_config(TidyDoc doc, zend_string *str_string, HashTable *ht_options)
212 {
213 	if (ht_options) {
214 		return _php_tidy_apply_config_array(doc, ht_options);
215 	} else if (str_string) {
216 		if (php_check_open_basedir(ZSTR_VAL(str_string))) {
217 			return FAILURE;
218 		}
219 		php_tidy_load_config(doc, ZSTR_VAL(str_string));
220 	}
221 	return SUCCESS;
222 }
223 
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)224 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
225 {
226 	TidyOption opt = tidyGetOptionByName(doc, optname);
227 	zend_string *str, *tmp_str;
228 	zend_long lval;
229 
230 	if (!opt) {
231 		php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
232 		return FAILURE;
233 	}
234 
235 	if (tidyOptIsReadOnly(opt)) {
236 		php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
237 		return FAILURE;
238 	}
239 
240 	switch(tidyOptGetType(opt)) {
241 		case TidyString:
242 			str = zval_get_tmp_string(value, &tmp_str);
243 			if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
244 				zend_tmp_string_release(tmp_str);
245 				return SUCCESS;
246 			}
247 			zend_tmp_string_release(tmp_str);
248 			break;
249 
250 		case TidyInteger:
251 			lval = zval_get_long(value);
252 			if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
253 				return SUCCESS;
254 			}
255 			break;
256 
257 		case TidyBoolean:
258 			lval = zval_get_long(value);
259 			if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
260 				return SUCCESS;
261 			}
262 			break;
263 
264 		default:
265 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
266 			break;
267 	}
268 
269 	return FAILURE;
270 }
271 
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,bool is_file)272 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, bool is_file)
273 {
274 	char *enc = NULL;
275 	size_t enc_len = 0;
276 	TidyDoc doc;
277 	TidyBuffer *errbuf;
278 	zend_string *data, *arg1, *config_str = NULL;
279 	HashTable *config_ht = NULL;
280 
281 	if (is_file) {
282 		bool use_include_path = 0;
283 
284 		ZEND_PARSE_PARAMETERS_START(1, 4)
285 			Z_PARAM_PATH_STR(arg1)
286 			Z_PARAM_OPTIONAL
287 			Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
288 			Z_PARAM_STRING(enc, enc_len)
289 			Z_PARAM_BOOL(use_include_path)
290 		ZEND_PARSE_PARAMETERS_END();
291 
292 		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
293 			RETURN_FALSE;
294 		}
295 	} else {
296 		ZEND_PARSE_PARAMETERS_START(1, 3)
297 			Z_PARAM_STR(arg1)
298 			Z_PARAM_OPTIONAL
299 			Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
300 			Z_PARAM_STRING(enc, enc_len)
301 		ZEND_PARSE_PARAMETERS_END();
302 
303 		data = arg1;
304 	}
305 
306 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
307 		if (is_file) {
308 			zend_string_release_ex(data, false);
309 			zend_argument_value_error(1, "Input string is too long");
310 		} else {
311 			zend_argument_value_error(1, "is too long");
312 		}
313 		RETURN_THROWS();
314 	}
315 
316 	doc = tidyCreate();
317 	errbuf = emalloc(sizeof(TidyBuffer));
318 	tidyBufInit(errbuf);
319 
320 	if (tidySetErrorBuffer(doc, errbuf) != 0) {
321 		tidyBufFree(errbuf);
322 		efree(errbuf);
323 		tidyRelease(doc);
324 		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
325 	}
326 
327 	tidyOptSetBool(doc, TidyForceOutput, yes);
328 	tidyOptSetBool(doc, TidyMark, no);
329 
330 	TIDY_SET_DEFAULT_CONFIG(doc);
331 
332 	if (php_tidy_apply_config(doc, config_str, config_ht) != SUCCESS) {
333 		RETVAL_FALSE;
334 	} else if (enc_len) {
335 		if (tidySetCharEncoding(doc, enc) < 0) {
336 			php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
337 			RETVAL_FALSE;
338 		}
339 	}
340 
341 	if (data) {
342 		TidyBuffer buf;
343 
344 		tidyBufInit(&buf);
345 		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
346 
347 		if (tidyParseBuffer(doc, &buf) < 0) {
348 			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
349 			RETVAL_FALSE;
350 		} else {
351 			if (tidyCleanAndRepair(doc) >= 0) {
352 				TidyBuffer output;
353 				tidyBufInit(&output);
354 
355 				tidySaveBuffer (doc, &output);
356 				FIX_BUFFER(&output);
357 				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
358 				tidyBufFree(&output);
359 			} else {
360 				RETVAL_FALSE;
361 			}
362 		}
363 	}
364 
365 	if (is_file) {
366 		zend_string_release_ex(data, 0);
367 	}
368 
369 	tidyBufFree(errbuf);
370 	efree(errbuf);
371 	tidyRelease(doc);
372 }
373 
php_tidy_file_to_mem(char * filename,bool use_include_path)374 static zend_string *php_tidy_file_to_mem(char *filename, bool use_include_path)
375 {
376 	php_stream *stream;
377 	zend_string *data = NULL;
378 
379 	if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
380 		return NULL;
381 	}
382 	if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
383 		data = ZSTR_EMPTY_ALLOC();
384 	}
385 	php_stream_close(stream);
386 
387 	return data;
388 }
389 
tidy_object_free_storage(zend_object * object)390 static void tidy_object_free_storage(zend_object *object)
391 {
392 	PHPTidyObj *intern = php_tidy_fetch_object(object);
393 
394 	zend_object_std_dtor(&intern->std);
395 
396 	if (intern->ptdoc) {
397 		intern->ptdoc->ref_count--;
398 
399 		if (intern->ptdoc->ref_count <= 0) {
400 			tidyBufFree(intern->ptdoc->errbuf);
401 			efree(intern->ptdoc->errbuf);
402 			tidyRelease(intern->ptdoc->doc);
403 			efree(intern->ptdoc);
404 		}
405 	}
406 }
407 
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)408 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
409 {
410 	PHPTidyObj *intern;
411 
412 	intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
413 	zend_object_std_init(&intern->std, class_type);
414 	object_properties_init(&intern->std, class_type);
415 
416 	switch(objtype) {
417 		case is_node:
418 			break;
419 
420 		case is_doc:
421 			intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
422 			intern->ptdoc->doc = tidyCreate();
423 			intern->ptdoc->ref_count = 1;
424 			intern->ptdoc->initialized = 0;
425 			intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
426 			tidyBufInit(intern->ptdoc->errbuf);
427 
428 			if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
429 				tidyBufFree(intern->ptdoc->errbuf);
430 				efree(intern->ptdoc->errbuf);
431 				tidyRelease(intern->ptdoc->doc);
432 				efree(intern->ptdoc);
433 				efree(intern);
434 				php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
435 			}
436 
437 			tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
438 			tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
439 
440 			TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
441 			break;
442 	}
443 
444 	intern->std.handlers = handlers;
445 
446 	return &intern->std;
447 }
448 
tidy_object_new_node(zend_class_entry * class_type)449 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
450 {
451 	return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
452 }
453 
tidy_object_new_doc(zend_class_entry * class_type)454 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
455 {
456 	return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
457 }
458 
tidy_instantiate(zend_class_entry * pce,zval * object)459 static zval *tidy_instantiate(zend_class_entry *pce, zval *object)
460 {
461 	object_init_ex(object, pce);
462 	return object;
463 }
464 
tidy_doc_cast_handler(zend_object * in,zval * out,int type)465 static zend_result tidy_doc_cast_handler(zend_object *in, zval *out, int type)
466 {
467 	TidyBuffer output;
468 	PHPTidyObj *obj;
469 
470 	switch (type) {
471 		case IS_LONG:
472 		case _IS_NUMBER:
473 			ZVAL_LONG(out, 0);
474 			break;
475 
476 		case IS_DOUBLE:
477 			ZVAL_DOUBLE(out, 0);
478 			break;
479 
480 		case _IS_BOOL:
481 			ZVAL_TRUE(out);
482 			break;
483 
484 		case IS_STRING:
485 			obj = php_tidy_fetch_object(in);
486 			tidyBufInit(&output);
487 			tidySaveBuffer (obj->ptdoc->doc, &output);
488 			if (output.size) {
489 				ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
490 			} else {
491 				ZVAL_EMPTY_STRING(out);
492 			}
493 			tidyBufFree(&output);
494 			break;
495 
496 		default:
497 			return FAILURE;
498 	}
499 
500 	return SUCCESS;
501 }
502 
tidy_node_cast_handler(zend_object * in,zval * out,int type)503 static zend_result tidy_node_cast_handler(zend_object *in, zval *out, int type)
504 {
505 	TidyBuffer buf;
506 	PHPTidyObj *obj;
507 
508 	switch(type) {
509 		case IS_LONG:
510 		case _IS_NUMBER:
511 			ZVAL_LONG(out, 0);
512 			break;
513 
514 		case IS_DOUBLE:
515 			ZVAL_DOUBLE(out, 0);
516 			break;
517 
518 		case _IS_BOOL:
519 			ZVAL_TRUE(out);
520 			break;
521 
522 		case IS_STRING:
523 			obj = php_tidy_fetch_object(in);
524 			tidyBufInit(&buf);
525 			if (obj->ptdoc) {
526 				tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
527 				ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
528 			} else {
529 				ZVAL_EMPTY_STRING(out);
530 			}
531 			tidyBufFree(&buf);
532 			break;
533 
534 		default:
535 			return FAILURE;
536 	}
537 
538 	return SUCCESS;
539 }
540 
tidy_doc_update_properties(PHPTidyObj * obj)541 static void tidy_doc_update_properties(PHPTidyObj *obj)
542 {
543 	TidyBuffer output;
544 
545 	tidyBufInit(&output);
546 	tidySaveBuffer (obj->ptdoc->doc, &output);
547 
548 	if (output.size) {
549 		zend_update_property_stringl(
550 			tidy_ce_doc,
551 			&obj->std,
552 			"value",
553 			sizeof("value") - 1,
554 			(char*) output.bp,
555 			output.size-1
556 		);
557 	}
558 
559 	tidyBufFree(&output);
560 
561 	if (obj->ptdoc->errbuf->size) {
562 		zend_update_property_stringl(
563 			tidy_ce_doc,
564 			&obj->std,
565 			"errorBuffer",
566 			sizeof("errorBuffer") - 1,
567 			(char*) obj->ptdoc->errbuf->bp,
568 			obj->ptdoc->errbuf->size-1
569 		);
570 	}
571 }
572 
tidy_add_node_default_properties(PHPTidyObj * obj)573 static void tidy_add_node_default_properties(PHPTidyObj *obj)
574 {
575 	TidyBuffer buf;
576 	TidyAttr	tempattr;
577 	TidyNode	tempnode;
578 	zval attribute, children, temp;
579 	PHPTidyObj *newobj;
580 	char *name;
581 
582 	tidyBufInit(&buf);
583 	tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
584 
585 	zend_update_property_stringl(
586 		tidy_ce_node,
587 		&obj->std,
588 		"value",
589 		sizeof("value") - 1,
590 		buf.size ? (char *) buf.bp : "",
591 		buf.size ? buf.size - 1 : 0
592 	);
593 
594 	tidyBufFree(&buf);
595 
596 	name = (char *) tidyNodeGetName(obj->node);
597 
598 	zend_update_property_string(
599 		tidy_ce_node,
600 		&obj->std,
601 		"name",
602 		sizeof("name") - 1,
603 		name ? name : ""
604 	);
605 
606 	zend_update_property_long(
607 		tidy_ce_node,
608 		&obj->std,
609 		"type",
610 		sizeof("type") - 1,
611 		tidyNodeGetType(obj->node)
612 	);
613 
614 	zend_update_property_long(
615 		tidy_ce_node,
616 		&obj->std,
617 		"line",
618 		sizeof("line") - 1,
619 		tidyNodeLine(obj->node)
620 	);
621 
622 	zend_update_property_long(
623 		tidy_ce_node,
624 		&obj->std,
625 		"column",
626 		sizeof("column") - 1,
627 		tidyNodeColumn(obj->node)
628 	);
629 
630 	zend_update_property_bool(
631 		tidy_ce_node,
632 		&obj->std,
633 		"proprietary",
634 		sizeof("proprietary") - 1,
635 		tidyNodeIsProp(obj->ptdoc->doc, obj->node)
636 	);
637 
638 	switch(tidyNodeGetType(obj->node)) {
639 		case TidyNode_Root:
640 		case TidyNode_DocType:
641 		case TidyNode_Text:
642 		case TidyNode_Comment:
643 			zend_update_property_null(
644 				tidy_ce_node,
645 				&obj->std,
646 				"id",
647 				sizeof("id") - 1
648 			);
649 			break;
650 
651 		default:
652 			zend_update_property_long(
653 				tidy_ce_node,
654 				&obj->std,
655 				"id",
656 				sizeof("id") - 1,
657 				tidyNodeGetId(obj->node)
658 			);
659 	}
660 
661 	tempattr = tidyAttrFirst(obj->node);
662 
663 	if (tempattr) {
664 		char *name, *val;
665 		array_init(&attribute);
666 
667 		do {
668 			name = (char *)tidyAttrName(tempattr);
669 			val = (char *)tidyAttrValue(tempattr);
670 			if (name) {
671 				if (val) {
672 					add_assoc_string(&attribute, name, val);
673 				} else {
674 					add_assoc_str(&attribute, name, zend_empty_string);
675 				}
676 			}
677 		} while((tempattr = tidyAttrNext(tempattr)));
678 	} else {
679 		ZVAL_NULL(&attribute);
680 	}
681 
682 	zend_update_property(
683 		tidy_ce_node,
684 		&obj->std,
685 		"attribute",
686 		sizeof("attribute") - 1,
687 		&attribute
688 	);
689 
690 	zval_ptr_dtor(&attribute);
691 
692 	tempnode = tidyGetChild(obj->node);
693 
694 	if (tempnode) {
695 		array_init(&children);
696 		do {
697 			tidy_instantiate(tidy_ce_node, &temp);
698 			newobj = Z_TIDY_P(&temp);
699 			newobj->node = tempnode;
700 			newobj->type = is_node;
701 			newobj->ptdoc = obj->ptdoc;
702 			newobj->ptdoc->ref_count++;
703 
704 			tidy_add_node_default_properties(newobj);
705 			add_next_index_zval(&children, &temp);
706 
707 		} while((tempnode = tidyGetNext(tempnode)));
708 
709 	} else {
710 		ZVAL_NULL(&children);
711 	}
712 
713 	zend_update_property(
714 		tidy_ce_node,
715 		&obj->std,
716 		"child",
717 		sizeof("child") - 1,
718 		&children
719 	);
720 
721 	zval_ptr_dtor(&children);
722 }
723 
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)724 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
725 {
726 	*type = tidyOptGetType(opt);
727 
728 	switch (*type) {
729 		case TidyString: {
730 			char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
731 			if (val) {
732 				return (void *) zend_string_init(val, strlen(val), 0);
733 			} else {
734 				return (void *) ZSTR_EMPTY_ALLOC();
735 			}
736 		}
737 			break;
738 
739 		case TidyInteger:
740 			return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
741 			break;
742 
743 		case TidyBoolean:
744 			return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
745 			break;
746 	}
747 
748 	/* should not happen */
749 	return NULL;
750 }
751 
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)752 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
753 {
754 	PHPTidyObj *newobj;
755 	TidyNode node;
756 	TIDY_FETCH_OBJECT;
757 
758 	switch (node_type) {
759 		case is_root_node:
760 			node = tidyGetRoot(obj->ptdoc->doc);
761 			break;
762 
763 		case is_html_node:
764 			node = tidyGetHtml(obj->ptdoc->doc);
765 			break;
766 
767 		case is_head_node:
768 			node = tidyGetHead(obj->ptdoc->doc);
769 			break;
770 
771 		case is_body_node:
772 			node = tidyGetBody(obj->ptdoc->doc);
773 			break;
774 
775 		EMPTY_SWITCH_DEFAULT_CASE()
776 	}
777 
778 	if (!node) {
779 		RETURN_NULL();
780 	}
781 
782 	tidy_instantiate(tidy_ce_node, return_value);
783 	newobj = Z_TIDY_P(return_value);
784 	newobj->type  = is_node;
785 	newobj->ptdoc = obj->ptdoc;
786 	newobj->node  = node;
787 	newobj->ptdoc->ref_count++;
788 
789 	tidy_add_node_default_properties(newobj);
790 }
791 
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)792 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
793 {
794 	zval *opt_val;
795 	zend_string *opt_name;
796 
797 	if (!HT_IS_PACKED(ht_options)) {
798 		ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
799 			if (opt_name == NULL) {
800 				continue;
801 			}
802 			_php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
803 		} ZEND_HASH_FOREACH_END();
804 	}
805 	return SUCCESS;
806 }
807 
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)808 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
809 {
810 	TidyBuffer buf;
811 
812 	if(enc) {
813 		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
814 			php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
815 			return FAILURE;
816 		}
817 	}
818 
819 	obj->ptdoc->initialized = 1;
820 
821 	tidyBufInit(&buf);
822 	tidyBufAttach(&buf, (byte *) string, len);
823 	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
824 		php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
825 		return FAILURE;
826 	}
827 	tidy_doc_update_properties(obj);
828 
829 	return SUCCESS;
830 }
831 
PHP_MINIT_FUNCTION(tidy)832 static PHP_MINIT_FUNCTION(tidy)
833 {
834 	tidySetMallocCall(php_tidy_malloc);
835 	tidySetReallocCall(php_tidy_realloc);
836 	tidySetFreeCall(php_tidy_free);
837 	tidySetPanicCall(php_tidy_panic);
838 
839 	REGISTER_INI_ENTRIES();
840 
841 	tidy_ce_doc = register_class_tidy();
842 	tidy_ce_doc->create_object = tidy_object_new_doc;
843 	memcpy(&tidy_object_handlers_doc, &std_object_handlers, sizeof(zend_object_handlers));
844 	tidy_object_handlers_doc.clone_obj = NULL;
845 
846 	tidy_ce_node = register_class_tidyNode();
847 	tidy_ce_node->create_object = tidy_object_new_node;
848 	memcpy(&tidy_object_handlers_node, &std_object_handlers, sizeof(zend_object_handlers));
849 	tidy_object_handlers_node.clone_obj = NULL;
850 
851 	tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
852 	tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
853 
854 	tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
855 	tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
856 
857 	register_tidy_symbols(module_number);
858 
859 	php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
860 
861 	return SUCCESS;
862 }
863 
PHP_RINIT_FUNCTION(tidy)864 static PHP_RINIT_FUNCTION(tidy)
865 {
866 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
867 	ZEND_TSRMLS_CACHE_UPDATE();
868 #endif
869 
870 	php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
871 
872 	return SUCCESS;
873 }
874 
PHP_RSHUTDOWN_FUNCTION(tidy)875 static PHP_RSHUTDOWN_FUNCTION(tidy)
876 {
877 	TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
878 
879 	return SUCCESS;
880 }
881 
PHP_MSHUTDOWN_FUNCTION(tidy)882 static PHP_MSHUTDOWN_FUNCTION(tidy)
883 {
884 	UNREGISTER_INI_ENTRIES();
885 	return SUCCESS;
886 }
887 
PHP_MINFO_FUNCTION(tidy)888 static PHP_MINFO_FUNCTION(tidy)
889 {
890 	php_info_print_table_start();
891 	php_info_print_table_row(2, "Tidy support", "enabled");
892 #ifdef HAVE_TIDYBUFFIO_H
893 	php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
894 #elif defined(HAVE_TIDYP_H)
895 	php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
896 #endif
897 #ifdef HAVE_TIDYRELEASEDATE
898 	php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
899 #endif
900 	php_info_print_table_end();
901 
902 	DISPLAY_INI_ENTRIES();
903 }
904 
PHP_INI_MH(php_tidy_set_clean_output)905 static PHP_INI_MH(php_tidy_set_clean_output)
906 {
907 	int status;
908 	bool value;
909 
910 	value = zend_ini_parse_bool(new_value);
911 
912 	if (stage == PHP_INI_STAGE_RUNTIME) {
913 		status = php_output_get_status();
914 
915 		if (value && (status & PHP_OUTPUT_WRITTEN)) {
916 			php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
917 			return FAILURE;
918 		}
919 		if (status & PHP_OUTPUT_SENT) {
920 			php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
921 			return FAILURE;
922 		}
923 	}
924 
925 	status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
926 
927 	if (stage == PHP_INI_STAGE_RUNTIME && value) {
928 		if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
929 			php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
930 		}
931 	}
932 
933 	return status;
934 }
935 
936 /*
937  * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
938  */
939 
php_tidy_clean_output_start(const char * name,size_t name_len)940 static void php_tidy_clean_output_start(const char *name, size_t name_len)
941 {
942 	php_output_handler *h;
943 
944 	if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
945 		php_output_handler_start(h);
946 	}
947 }
948 
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)949 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
950 {
951 	if (chunk_size) {
952 		php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
953 		return NULL;
954 	}
955 	if (!TG(clean_output)) {
956 		TG(clean_output) = 1;
957 	}
958 	return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
959 }
960 
php_tidy_output_handler(void ** nothing,php_output_context * output_context)961 static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
962 {
963 	int status = FAILURE;
964 	TidyDoc doc;
965 	TidyBuffer inbuf, outbuf, errbuf;
966 
967 	if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
968 		doc = tidyCreate();
969 		tidyBufInit(&errbuf);
970 
971 		if (0 == tidySetErrorBuffer(doc, &errbuf)) {
972 			tidyOptSetBool(doc, TidyForceOutput, yes);
973 			tidyOptSetBool(doc, TidyMark, no);
974 
975 			if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
976 				php_error_docref(NULL, E_WARNING, "Input string is too long");
977 				return status;
978 			}
979 
980 			TIDY_SET_DEFAULT_CONFIG(doc);
981 
982 			tidyBufInit(&inbuf);
983 			tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
984 
985 			if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
986 				tidyBufInit(&outbuf);
987 				tidySaveBuffer(doc, &outbuf);
988 				FIX_BUFFER(&outbuf);
989 				output_context->out.data = (char *) outbuf.bp;
990 				output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
991 				output_context->out.free = 1;
992 				status = SUCCESS;
993 			}
994 		}
995 
996 		tidyRelease(doc);
997 		tidyBufFree(&errbuf);
998 	}
999 
1000 	return status;
1001 }
1002 
1003 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1004 PHP_FUNCTION(tidy_parse_string)
1005 {
1006 	char *enc = NULL;
1007 	size_t enc_len = 0;
1008 	zend_string *input, *options_str = NULL;
1009 	HashTable *options_ht = NULL;
1010 	PHPTidyObj *obj;
1011 
1012 	ZEND_PARSE_PARAMETERS_START(1, 3)
1013 		Z_PARAM_STR(input)
1014 		Z_PARAM_OPTIONAL
1015 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1016 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1017 	ZEND_PARSE_PARAMETERS_END();
1018 
1019 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1020 		zend_argument_value_error(1, "is too long");
1021 		RETURN_THROWS();
1022 	}
1023 
1024 	tidy_instantiate(tidy_ce_doc, return_value);
1025 	obj = Z_TIDY_P(return_value);
1026 
1027 	if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1028 	 || php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1029 		zval_ptr_dtor(return_value);
1030 		RETURN_FALSE;
1031 	}
1032 }
1033 /* }}} */
1034 
1035 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1036 PHP_FUNCTION(tidy_get_error_buffer)
1037 {
1038 	TIDY_FETCH_OBJECT;
1039 
1040 	if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1041 		RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1042 	} else {
1043 		RETURN_FALSE;
1044 	}
1045 }
1046 /* }}} */
1047 
1048 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1049 PHP_FUNCTION(tidy_get_output)
1050 {
1051 	TidyBuffer output;
1052 	TIDY_FETCH_OBJECT;
1053 
1054 	tidyBufInit(&output);
1055 	tidySaveBuffer(obj->ptdoc->doc, &output);
1056 	FIX_BUFFER(&output);
1057 	RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1058 	tidyBufFree(&output);
1059 }
1060 /* }}} */
1061 
1062 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1063 PHP_FUNCTION(tidy_parse_file)
1064 {
1065 	char *enc = NULL;
1066 	size_t enc_len = 0;
1067 	bool use_include_path = 0;
1068 	zend_string *inputfile, *contents, *options_str = NULL;
1069 	HashTable *options_ht = NULL;
1070 
1071 	PHPTidyObj *obj;
1072 
1073 	ZEND_PARSE_PARAMETERS_START(1, 4)
1074 		Z_PARAM_PATH_STR(inputfile)
1075 		Z_PARAM_OPTIONAL
1076 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1077 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1078 		Z_PARAM_BOOL(use_include_path)
1079 	ZEND_PARSE_PARAMETERS_END();
1080 
1081 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1082 		php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1083 		RETURN_FALSE;
1084 	}
1085 
1086 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1087 		zend_string_release_ex(contents, 0);
1088 		zend_value_error("Input string is too long");
1089 		RETURN_THROWS();
1090 	}
1091 
1092 	tidy_instantiate(tidy_ce_doc, return_value);
1093 	obj = Z_TIDY_P(return_value);
1094 
1095 	if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1096 	 || php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1097 		zval_ptr_dtor(return_value);
1098 		RETVAL_FALSE;
1099 	}
1100 
1101 	zend_string_release_ex(contents, 0);
1102 }
1103 /* }}} */
1104 
1105 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1106 PHP_FUNCTION(tidy_clean_repair)
1107 {
1108 	TIDY_FETCH_OBJECT;
1109 
1110 	if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1111 		tidy_doc_update_properties(obj);
1112 		RETURN_TRUE;
1113 	}
1114 
1115 	RETURN_FALSE;
1116 }
1117 /* }}} */
1118 
1119 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1120 PHP_FUNCTION(tidy_repair_string)
1121 {
1122 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1123 }
1124 /* }}} */
1125 
1126 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1127 PHP_FUNCTION(tidy_repair_file)
1128 {
1129 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1130 }
1131 /* }}} */
1132 
1133 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1134 PHP_FUNCTION(tidy_diagnose)
1135 {
1136 	TIDY_FETCH_OBJECT;
1137 
1138 	if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1139 		tidy_doc_update_properties(obj);
1140 		RETURN_TRUE;
1141 	}
1142 
1143 	RETURN_FALSE;
1144 }
1145 /* }}} */
1146 
1147 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1148 PHP_FUNCTION(tidy_get_release)
1149 {
1150 	if (zend_parse_parameters_none() == FAILURE) {
1151 		RETURN_THROWS();
1152 	}
1153 
1154 #ifdef HAVE_TIDYRELEASEDATE
1155 	RETURN_STRING((char *)tidyReleaseDate());
1156 #else
1157 	RETURN_STRING((char *)"unknown");
1158 #endif
1159 }
1160 /* }}} */
1161 
1162 
1163 #ifdef HAVE_TIDYOPTGETDOC
1164 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1165 PHP_FUNCTION(tidy_get_opt_doc)
1166 {
1167 	PHPTidyObj *obj;
1168 	char *optval, *optname;
1169 	size_t optname_len;
1170 	TidyOption opt;
1171 	zval *object;
1172 
1173 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1174 		RETURN_THROWS();
1175 	}
1176 
1177 	obj = Z_TIDY_P(object);
1178 
1179 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1180 
1181 	if (!opt) {
1182 		zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1183 		RETURN_THROWS();
1184 	}
1185 
1186 	if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1187 		RETURN_STRING(optval);
1188 	}
1189 
1190 	RETURN_FALSE;
1191 }
1192 /* }}} */
1193 #endif
1194 
1195 
1196 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1197 PHP_FUNCTION(tidy_get_config)
1198 {
1199 	TidyIterator itOpt;
1200 	char *opt_name;
1201 	void *opt_value;
1202 	TidyOptionType optt;
1203 
1204 	TIDY_FETCH_OBJECT;
1205 
1206 	itOpt = tidyGetOptionList(obj->ptdoc->doc);
1207 
1208 	array_init(return_value);
1209 
1210 	while (itOpt) {
1211 		TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1212 
1213 		opt_name = (char *)tidyOptGetName(opt);
1214 		opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1215 		switch (optt) {
1216 			case TidyString:
1217 				add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1218 				break;
1219 
1220 			case TidyInteger:
1221 				add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1222 				break;
1223 
1224 			case TidyBoolean:
1225 				add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1226 				break;
1227 		}
1228 	}
1229 
1230 	return;
1231 }
1232 /* }}} */
1233 
1234 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1235 PHP_FUNCTION(tidy_get_status)
1236 {
1237 	TIDY_FETCH_OBJECT;
1238 
1239 	RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1240 }
1241 /* }}} */
1242 
1243 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1244 PHP_FUNCTION(tidy_get_html_ver)
1245 {
1246 	TIDY_FETCH_INITIALIZED_OBJECT;
1247 
1248 	RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1249 }
1250 /* }}} */
1251 
1252 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1253 PHP_FUNCTION(tidy_is_xhtml)
1254 {
1255 	TIDY_FETCH_INITIALIZED_OBJECT;
1256 
1257 	RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1258 }
1259 /* }}} */
1260 
1261 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1262 PHP_FUNCTION(tidy_is_xml)
1263 {
1264 	TIDY_FETCH_INITIALIZED_OBJECT;
1265 
1266 	RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1267 }
1268 /* }}} */
1269 
1270 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1271 PHP_FUNCTION(tidy_error_count)
1272 {
1273 	TIDY_FETCH_OBJECT;
1274 
1275 	RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1276 }
1277 /* }}} */
1278 
1279 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1280 PHP_FUNCTION(tidy_warning_count)
1281 {
1282 	TIDY_FETCH_OBJECT;
1283 
1284 	RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1285 }
1286 /* }}} */
1287 
1288 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1289 PHP_FUNCTION(tidy_access_count)
1290 {
1291 	TIDY_FETCH_OBJECT;
1292 
1293 	RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1294 }
1295 /* }}} */
1296 
1297 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1298 PHP_FUNCTION(tidy_config_count)
1299 {
1300 	TIDY_FETCH_OBJECT;
1301 
1302 	RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1303 }
1304 /* }}} */
1305 
1306 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1307 PHP_FUNCTION(tidy_getopt)
1308 {
1309 	PHPTidyObj *obj;
1310 	char *optname;
1311 	void *optval;
1312 	size_t optname_len;
1313 	TidyOption opt;
1314 	TidyOptionType optt;
1315 	zval *object;
1316 
1317 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1318 		RETURN_THROWS();
1319 	}
1320 
1321 	obj = Z_TIDY_P(object);
1322 
1323 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1324 
1325 	if (!opt) {
1326 		zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1327 		RETURN_THROWS();
1328 	}
1329 
1330 	optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1331 	switch (optt) {
1332 		case TidyString:
1333 			RETVAL_STR((zend_string*)optval);
1334 			return;
1335 
1336 		case TidyInteger:
1337 			RETURN_LONG((zend_long)optval);
1338 			break;
1339 
1340 		case TidyBoolean:
1341 			if (optval) {
1342 				RETURN_TRUE;
1343 			} else {
1344 				RETURN_FALSE;
1345 			}
1346 			break;
1347 
1348 		default:
1349 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1350 			break;
1351 	}
1352 
1353 	RETURN_FALSE;
1354 }
1355 /* }}} */
1356 
PHP_METHOD(tidy,__construct)1357 PHP_METHOD(tidy, __construct)
1358 {
1359 	char *enc = NULL;
1360 	size_t enc_len = 0;
1361 	bool use_include_path = 0;
1362 	HashTable *options_ht = NULL;
1363 	zend_string *contents, *inputfile = NULL, *options_str = NULL;
1364 	PHPTidyObj *obj;
1365 
1366 	ZEND_PARSE_PARAMETERS_START(0, 4)
1367 		Z_PARAM_OPTIONAL
1368 		Z_PARAM_PATH_STR_OR_NULL(inputfile)
1369 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1370 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1371 		Z_PARAM_BOOL(use_include_path)
1372 	ZEND_PARSE_PARAMETERS_END();
1373 
1374 	TIDY_SET_CONTEXT;
1375 	obj = Z_TIDY_P(object);
1376 
1377 	if (inputfile) {
1378 		if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1379 			php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1380 			return;
1381 		}
1382 
1383 		if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1384 			zend_string_release_ex(contents, 0);
1385 			zend_value_error("Input string is too long");
1386 			RETURN_THROWS();
1387 		}
1388 
1389 		if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS) {
1390 			/* TODO: this is the constructor, we should throw probably... */
1391 			zend_string_release_ex(contents, 0);
1392 			RETURN_FALSE;
1393 		}
1394 
1395 		php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1396 
1397 		zend_string_release_ex(contents, 0);
1398 	}
1399 }
1400 
PHP_METHOD(tidy,parseFile)1401 PHP_METHOD(tidy, parseFile)
1402 {
1403 	char *enc = NULL;
1404 	size_t enc_len = 0;
1405 	bool use_include_path = 0;
1406 	HashTable *options_ht = NULL;
1407 	zend_string *inputfile, *contents, *options_str = NULL;
1408 	PHPTidyObj *obj;
1409 
1410 	ZEND_PARSE_PARAMETERS_START(1, 4)
1411 		Z_PARAM_PATH_STR(inputfile)
1412 		Z_PARAM_OPTIONAL
1413 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1414 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1415 		Z_PARAM_BOOL(use_include_path)
1416 	ZEND_PARSE_PARAMETERS_END();
1417 
1418 	TIDY_SET_CONTEXT;
1419 	obj = Z_TIDY_P(object);
1420 
1421 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1422 		php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1423 		RETURN_FALSE;
1424 	}
1425 
1426 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1427 		zend_string_release_ex(contents, 0);
1428 		zend_value_error("Input string is too long");
1429 		RETURN_THROWS();
1430 	}
1431 
1432 	if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1433 	 || php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1434 		RETVAL_FALSE;
1435 	} else {
1436 		RETVAL_TRUE;
1437 	}
1438 
1439 	zend_string_release_ex(contents, 0);
1440 }
1441 
PHP_METHOD(tidy,parseString)1442 PHP_METHOD(tidy, parseString)
1443 {
1444 	char *enc = NULL;
1445 	size_t enc_len = 0;
1446 	HashTable *options_ht = NULL;
1447 	PHPTidyObj *obj;
1448 	zend_string *input, *options_str = NULL;
1449 
1450 	ZEND_PARSE_PARAMETERS_START(1, 3)
1451 		Z_PARAM_STR(input)
1452 		Z_PARAM_OPTIONAL
1453 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1454 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1455 	ZEND_PARSE_PARAMETERS_END();
1456 
1457 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1458 		zend_argument_value_error(1, "is too long");
1459 		RETURN_THROWS();
1460 	}
1461 
1462 	TIDY_SET_CONTEXT;
1463 	obj = Z_TIDY_P(object);
1464 
1465 	if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) == SUCCESS
1466 	 && php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1467 		RETURN_TRUE;
1468 	}
1469 
1470 	RETURN_FALSE;
1471 }
1472 
1473 
1474 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1475 PHP_FUNCTION(tidy_get_root)
1476 {
1477 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1478 }
1479 /* }}} */
1480 
1481 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1482 PHP_FUNCTION(tidy_get_html)
1483 {
1484 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1485 }
1486 /* }}} */
1487 
1488 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1489 PHP_FUNCTION(tidy_get_head)
1490 {
1491 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1492 }
1493 /* }}} */
1494 
1495 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1496 PHP_FUNCTION(tidy_get_body)
1497 {
1498 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1499 }
1500 /* }}} */
1501 
1502 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1503 PHP_METHOD(tidyNode, hasChildren)
1504 {
1505 	TIDY_FETCH_ONLY_OBJECT;
1506 
1507 	if (tidyGetChild(obj->node)) {
1508 		RETURN_TRUE;
1509 	} else {
1510 		RETURN_FALSE;
1511 	}
1512 }
1513 /* }}} */
1514 
1515 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1516 PHP_METHOD(tidyNode, hasSiblings)
1517 {
1518 	TIDY_FETCH_ONLY_OBJECT;
1519 
1520 	if (obj->node && tidyGetNext(obj->node)) {
1521 		RETURN_TRUE;
1522 	} else {
1523 		RETURN_FALSE;
1524 	}
1525 }
1526 /* }}} */
1527 
1528 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1529 PHP_METHOD(tidyNode, isComment)
1530 {
1531 	TIDY_FETCH_ONLY_OBJECT;
1532 
1533 	if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1534 		RETURN_TRUE;
1535 	} else {
1536 		RETURN_FALSE;
1537 	}
1538 }
1539 /* }}} */
1540 
1541 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1542 PHP_METHOD(tidyNode, isHtml)
1543 {
1544 	TIDY_FETCH_ONLY_OBJECT;
1545 
1546 	switch (tidyNodeGetType(obj->node)) {
1547 		case TidyNode_Start:
1548 		case TidyNode_End:
1549 		case TidyNode_StartEnd:
1550 			RETURN_TRUE;
1551 		default:
1552 			RETURN_FALSE;
1553 	}
1554 }
1555 /* }}} */
1556 
1557 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1558 PHP_METHOD(tidyNode, isText)
1559 {
1560 	TIDY_FETCH_ONLY_OBJECT;
1561 
1562 	if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1563 		RETURN_TRUE;
1564 	} else {
1565 		RETURN_FALSE;
1566 	}
1567 }
1568 /* }}} */
1569 
1570 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1571 PHP_METHOD(tidyNode, isJste)
1572 {
1573 	TIDY_FETCH_ONLY_OBJECT;
1574 
1575 	if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1576 		RETURN_TRUE;
1577 	} else {
1578 		RETURN_FALSE;
1579 	}
1580 }
1581 /* }}} */
1582 
1583 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1584 PHP_METHOD(tidyNode, isAsp)
1585 {
1586 	TIDY_FETCH_ONLY_OBJECT;
1587 
1588 	if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1589 		RETURN_TRUE;
1590 	} else {
1591 		RETURN_FALSE;
1592 	}
1593 }
1594 /* }}} */
1595 
1596 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1597 PHP_METHOD(tidyNode, isPhp)
1598 {
1599 	TIDY_FETCH_ONLY_OBJECT;
1600 
1601 	if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1602 		RETURN_TRUE;
1603 	} else {
1604 		RETURN_FALSE;
1605 	}
1606 }
1607 /* }}} */
1608 
1609 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1610 PHP_METHOD(tidyNode, getParent)
1611 {
1612 	TidyNode	parent_node;
1613 	PHPTidyObj *newobj;
1614 	TIDY_FETCH_ONLY_OBJECT;
1615 
1616 	parent_node = tidyGetParent(obj->node);
1617 	if(parent_node) {
1618 		tidy_instantiate(tidy_ce_node, return_value);
1619 		newobj = Z_TIDY_P(return_value);
1620 		newobj->node = parent_node;
1621 		newobj->type = is_node;
1622 		newobj->ptdoc = obj->ptdoc;
1623 		newobj->ptdoc->ref_count++;
1624 		tidy_add_node_default_properties(newobj);
1625 	} else {
1626 		ZVAL_NULL(return_value);
1627 	}
1628 }
1629 /* }}} */
1630 
1631 
1632 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1633 PHP_METHOD(tidyNode, __construct)
1634 {
1635 	zend_throw_error(NULL, "You should not create a tidyNode manually");
1636 }
1637 /* }}} */
1638 
1639 #endif
1640