xref: /php-src/ext/tidy/tidy.c (revision 2447cb25)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Author: John Coggeshall <john@php.net>                               |
14   +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include "php.h"
22 #include "php_tidy.h"
23 
24 #ifdef HAVE_TIDY
25 
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28 
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34 
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40 
41 #include "tidy_arginfo.h"
42 
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47 
48 /* {{{ ext/tidy macros */
49 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
50 
51 #define TIDY_SET_CONTEXT \
52     zval *object = getThis();
53 
54 #define TIDY_FETCH_OBJECT	\
55 	PHPTidyObj *obj;	\
56 	zval *object; \
57 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) == FAILURE) {	\
58 		RETURN_THROWS();	\
59 	}	\
60 	obj = Z_TIDY_P(object);	\
61 
62 #define TIDY_FETCH_INITIALIZED_OBJECT \
63 	TIDY_FETCH_OBJECT; \
64 	if (!obj->ptdoc->initialized) { \
65 		zend_throw_error(NULL, "tidy object is not initialized"); \
66 		return; \
67 	}
68 
69 #define TIDY_FETCH_ONLY_OBJECT	\
70 	PHPTidyObj *obj;	\
71 	TIDY_SET_CONTEXT; \
72 	if (zend_parse_parameters_none() == FAILURE) {	\
73 		RETURN_THROWS();	\
74 	}	\
75 	obj = Z_TIDY_P(object);	\
76 
77 #define TIDY_APPLY_CONFIG(_doc, _val_str, _val_ht) \
78 	if (_val_ht) { \
79 		_php_tidy_apply_config_array(_doc, _val_ht); \
80 	} else if (_val_str) { \
81 		TIDY_OPEN_BASE_DIR_CHECK(ZSTR_VAL(_val_str)); \
82 		php_tidy_load_config(_doc, ZSTR_VAL(_val_str)); \
83 	}
84 
85 #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
86 if (php_check_open_basedir(filename)) { \
87 	RETURN_FALSE; \
88 } \
89 
90 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
91 	if (TG(default_config) && TG(default_config)[0]) { \
92 		php_tidy_load_config(_doc, TG(default_config)); \
93 	}
94 /* }}} */
95 
96 /* {{{ ext/tidy structs */
97 typedef struct _PHPTidyDoc PHPTidyDoc;
98 typedef struct _PHPTidyObj PHPTidyObj;
99 
100 typedef enum {
101 	is_node,
102 	is_doc
103 } tidy_obj_type;
104 
105 typedef enum {
106 	is_root_node,
107 	is_html_node,
108 	is_head_node,
109 	is_body_node
110 } tidy_base_nodetypes;
111 
112 struct _PHPTidyDoc {
113 	TidyDoc			doc;
114 	TidyBuffer		*errbuf;
115 	unsigned int	ref_count;
116 	unsigned int    initialized:1;
117 };
118 
119 struct _PHPTidyObj {
120 	TidyNode		node;
121 	tidy_obj_type	type;
122 	PHPTidyDoc		*ptdoc;
123 	zend_object		std;
124 };
125 
php_tidy_fetch_object(zend_object * obj)126 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
127 	return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
128 }
129 
130 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
131 /* }}} */
132 
133 /* {{{ ext/tidy prototypes */
134 static zend_string *php_tidy_file_to_mem(char *, bool);
135 static void tidy_object_free_storage(zend_object *);
136 static zend_object *tidy_object_new_node(zend_class_entry *);
137 static zend_object *tidy_object_new_doc(zend_class_entry *);
138 static zval *tidy_instantiate(zend_class_entry *, zval *);
139 static zend_result tidy_doc_cast_handler(zend_object *, zval *, int);
140 static zend_result tidy_node_cast_handler(zend_object *, zval *, int);
141 static void tidy_doc_update_properties(PHPTidyObj *);
142 static void tidy_add_node_default_properties(PHPTidyObj *);
143 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
144 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
145 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
146 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
147 static PHP_INI_MH(php_tidy_set_clean_output);
148 static void php_tidy_clean_output_start(const char *name, size_t name_len);
149 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
150 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context);
151 
152 static PHP_MINIT_FUNCTION(tidy);
153 static PHP_MSHUTDOWN_FUNCTION(tidy);
154 static PHP_RINIT_FUNCTION(tidy);
155 static PHP_RSHUTDOWN_FUNCTION(tidy);
156 static PHP_MINFO_FUNCTION(tidy);
157 
158 ZEND_DECLARE_MODULE_GLOBALS(tidy)
159 
160 PHP_INI_BEGIN()
161 STD_PHP_INI_ENTRY("tidy.default_config",	"",		PHP_INI_SYSTEM,		OnUpdateString,				default_config,		zend_tidy_globals,	tidy_globals)
162 STD_PHP_INI_BOOLEAN("tidy.clean_output",	"0",	PHP_INI_USER,		php_tidy_set_clean_output,	clean_output,		zend_tidy_globals,	tidy_globals)
163 PHP_INI_END()
164 
165 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
166 
167 static zend_object_handlers tidy_object_handlers_doc;
168 static zend_object_handlers tidy_object_handlers_node;
169 
170 zend_module_entry tidy_module_entry = {
171 	STANDARD_MODULE_HEADER,
172 	"tidy",
173 	ext_functions,
174 	PHP_MINIT(tidy),
175 	PHP_MSHUTDOWN(tidy),
176 	PHP_RINIT(tidy),
177 	PHP_RSHUTDOWN(tidy),
178 	PHP_MINFO(tidy),
179 	PHP_TIDY_VERSION,
180 	PHP_MODULE_GLOBALS(tidy),
181 	NULL,
182 	NULL,
183 	NULL,
184 	STANDARD_MODULE_PROPERTIES_EX
185 };
186 
187 #ifdef COMPILE_DL_TIDY
188 #ifdef ZTS
189 ZEND_TSRMLS_CACHE_DEFINE()
190 #endif
ZEND_GET_MODULE(tidy)191 ZEND_GET_MODULE(tidy)
192 #endif
193 
194 static void* TIDY_CALL php_tidy_malloc(size_t len)
195 {
196 	return emalloc(len);
197 }
198 
php_tidy_realloc(void * buf,size_t len)199 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
200 {
201 	return erealloc(buf, len);
202 }
203 
php_tidy_free(void * buf)204 static void TIDY_CALL php_tidy_free(void *buf)
205 {
206 	efree(buf);
207 }
208 
php_tidy_panic(ctmbstr msg)209 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
210 {
211 	php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
212 }
213 
php_tidy_load_config(TidyDoc doc,const char * path)214 static void php_tidy_load_config(TidyDoc doc, const char *path)
215 {
216 	int ret = tidyLoadConfig(doc, path);
217 	if (ret < 0) {
218 		php_error_docref(NULL, E_WARNING, "Could not load the Tidy configuration file \"%s\"", path);
219 	} else if (ret > 0) {
220 		php_error_docref(NULL, E_NOTICE, "There were errors while parsing the Tidy configuration file \"%s\"", path);
221 	}
222 }
223 
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)224 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
225 {
226 	TidyOption opt = tidyGetOptionByName(doc, optname);
227 	zend_string *str, *tmp_str;
228 	zend_long lval;
229 
230 	if (!opt) {
231 		php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
232 		return FAILURE;
233 	}
234 
235 	if (tidyOptIsReadOnly(opt)) {
236 		php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
237 		return FAILURE;
238 	}
239 
240 	switch(tidyOptGetType(opt)) {
241 		case TidyString:
242 			str = zval_get_tmp_string(value, &tmp_str);
243 			if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
244 				zend_tmp_string_release(tmp_str);
245 				return SUCCESS;
246 			}
247 			zend_tmp_string_release(tmp_str);
248 			break;
249 
250 		case TidyInteger:
251 			lval = zval_get_long(value);
252 			if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
253 				return SUCCESS;
254 			}
255 			break;
256 
257 		case TidyBoolean:
258 			lval = zval_get_long(value);
259 			if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
260 				return SUCCESS;
261 			}
262 			break;
263 
264 		default:
265 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
266 			break;
267 	}
268 
269 	return FAILURE;
270 }
271 
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,bool is_file)272 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, bool is_file)
273 {
274 	char *enc = NULL;
275 	size_t enc_len = 0;
276 	TidyDoc doc;
277 	TidyBuffer *errbuf;
278 	zend_string *data, *arg1, *config_str = NULL;
279 	HashTable *config_ht = NULL;
280 
281 	if (is_file) {
282 		bool use_include_path = 0;
283 
284 		ZEND_PARSE_PARAMETERS_START(1, 4)
285 			Z_PARAM_PATH_STR(arg1)
286 			Z_PARAM_OPTIONAL
287 			Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
288 			Z_PARAM_STRING(enc, enc_len)
289 			Z_PARAM_BOOL(use_include_path)
290 		ZEND_PARSE_PARAMETERS_END();
291 
292 		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
293 			RETURN_FALSE;
294 		}
295 	} else {
296 		ZEND_PARSE_PARAMETERS_START(1, 3)
297 			Z_PARAM_STR(arg1)
298 			Z_PARAM_OPTIONAL
299 			Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
300 			Z_PARAM_STRING(enc, enc_len)
301 		ZEND_PARSE_PARAMETERS_END();
302 
303 		data = arg1;
304 	}
305 
306 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
307 		zend_argument_value_error(1, "is too long");
308 		RETURN_THROWS();
309 	}
310 
311 	doc = tidyCreate();
312 	errbuf = emalloc(sizeof(TidyBuffer));
313 	tidyBufInit(errbuf);
314 
315 	if (tidySetErrorBuffer(doc, errbuf) != 0) {
316 		tidyBufFree(errbuf);
317 		efree(errbuf);
318 		tidyRelease(doc);
319 		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
320 	}
321 
322 	tidyOptSetBool(doc, TidyForceOutput, yes);
323 	tidyOptSetBool(doc, TidyMark, no);
324 
325 	TIDY_SET_DEFAULT_CONFIG(doc);
326 
327 	TIDY_APPLY_CONFIG(doc, config_str, config_ht);
328 
329 	if(enc_len) {
330 		if (tidySetCharEncoding(doc, enc) < 0) {
331 			php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
332 			RETVAL_FALSE;
333 		}
334 	}
335 
336 	if (data) {
337 		TidyBuffer buf;
338 
339 		tidyBufInit(&buf);
340 		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
341 
342 		if (tidyParseBuffer(doc, &buf) < 0) {
343 			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
344 			RETVAL_FALSE;
345 		} else {
346 			if (tidyCleanAndRepair(doc) >= 0) {
347 				TidyBuffer output;
348 				tidyBufInit(&output);
349 
350 				tidySaveBuffer (doc, &output);
351 				FIX_BUFFER(&output);
352 				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
353 				tidyBufFree(&output);
354 			} else {
355 				RETVAL_FALSE;
356 			}
357 		}
358 	}
359 
360 	if (is_file) {
361 		zend_string_release_ex(data, 0);
362 	}
363 
364 	tidyBufFree(errbuf);
365 	efree(errbuf);
366 	tidyRelease(doc);
367 }
368 
php_tidy_file_to_mem(char * filename,bool use_include_path)369 static zend_string *php_tidy_file_to_mem(char *filename, bool use_include_path)
370 {
371 	php_stream *stream;
372 	zend_string *data = NULL;
373 
374 	if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
375 		return NULL;
376 	}
377 	if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
378 		data = ZSTR_EMPTY_ALLOC();
379 	}
380 	php_stream_close(stream);
381 
382 	return data;
383 }
384 
tidy_object_free_storage(zend_object * object)385 static void tidy_object_free_storage(zend_object *object)
386 {
387 	PHPTidyObj *intern = php_tidy_fetch_object(object);
388 
389 	zend_object_std_dtor(&intern->std);
390 
391 	if (intern->ptdoc) {
392 		intern->ptdoc->ref_count--;
393 
394 		if (intern->ptdoc->ref_count <= 0) {
395 			tidyBufFree(intern->ptdoc->errbuf);
396 			efree(intern->ptdoc->errbuf);
397 			tidyRelease(intern->ptdoc->doc);
398 			efree(intern->ptdoc);
399 		}
400 	}
401 }
402 
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)403 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
404 {
405 	PHPTidyObj *intern;
406 
407 	intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
408 	zend_object_std_init(&intern->std, class_type);
409 	object_properties_init(&intern->std, class_type);
410 
411 	switch(objtype) {
412 		case is_node:
413 			break;
414 
415 		case is_doc:
416 			intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
417 			intern->ptdoc->doc = tidyCreate();
418 			intern->ptdoc->ref_count = 1;
419 			intern->ptdoc->initialized = 0;
420 			intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
421 			tidyBufInit(intern->ptdoc->errbuf);
422 
423 			if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
424 				tidyBufFree(intern->ptdoc->errbuf);
425 				efree(intern->ptdoc->errbuf);
426 				tidyRelease(intern->ptdoc->doc);
427 				efree(intern->ptdoc);
428 				efree(intern);
429 				php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
430 			}
431 
432 			tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
433 			tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
434 
435 			TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
436 			break;
437 	}
438 
439 	intern->std.handlers = handlers;
440 
441 	return &intern->std;
442 }
443 
tidy_object_new_node(zend_class_entry * class_type)444 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
445 {
446 	return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
447 }
448 
tidy_object_new_doc(zend_class_entry * class_type)449 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
450 {
451 	return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
452 }
453 
tidy_instantiate(zend_class_entry * pce,zval * object)454 static zval *tidy_instantiate(zend_class_entry *pce, zval *object)
455 {
456 	object_init_ex(object, pce);
457 	return object;
458 }
459 
tidy_doc_cast_handler(zend_object * in,zval * out,int type)460 static zend_result tidy_doc_cast_handler(zend_object *in, zval *out, int type)
461 {
462 	TidyBuffer output;
463 	PHPTidyObj *obj;
464 
465 	switch (type) {
466 		case IS_LONG:
467 		case _IS_NUMBER:
468 			ZVAL_LONG(out, 0);
469 			break;
470 
471 		case IS_DOUBLE:
472 			ZVAL_DOUBLE(out, 0);
473 			break;
474 
475 		case _IS_BOOL:
476 			ZVAL_TRUE(out);
477 			break;
478 
479 		case IS_STRING:
480 			obj = php_tidy_fetch_object(in);
481 			tidyBufInit(&output);
482 			tidySaveBuffer (obj->ptdoc->doc, &output);
483 			if (output.size) {
484 				ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
485 			} else {
486 				ZVAL_EMPTY_STRING(out);
487 			}
488 			tidyBufFree(&output);
489 			break;
490 
491 		default:
492 			return FAILURE;
493 	}
494 
495 	return SUCCESS;
496 }
497 
tidy_node_cast_handler(zend_object * in,zval * out,int type)498 static zend_result tidy_node_cast_handler(zend_object *in, zval *out, int type)
499 {
500 	TidyBuffer buf;
501 	PHPTidyObj *obj;
502 
503 	switch(type) {
504 		case IS_LONG:
505 		case _IS_NUMBER:
506 			ZVAL_LONG(out, 0);
507 			break;
508 
509 		case IS_DOUBLE:
510 			ZVAL_DOUBLE(out, 0);
511 			break;
512 
513 		case _IS_BOOL:
514 			ZVAL_TRUE(out);
515 			break;
516 
517 		case IS_STRING:
518 			obj = php_tidy_fetch_object(in);
519 			tidyBufInit(&buf);
520 			if (obj->ptdoc) {
521 				tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
522 				ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
523 			} else {
524 				ZVAL_EMPTY_STRING(out);
525 			}
526 			tidyBufFree(&buf);
527 			break;
528 
529 		default:
530 			return FAILURE;
531 	}
532 
533 	return SUCCESS;
534 }
535 
tidy_doc_update_properties(PHPTidyObj * obj)536 static void tidy_doc_update_properties(PHPTidyObj *obj)
537 {
538 	TidyBuffer output;
539 
540 	tidyBufInit(&output);
541 	tidySaveBuffer (obj->ptdoc->doc, &output);
542 
543 	if (output.size) {
544 		zend_update_property_stringl(
545 			tidy_ce_doc,
546 			&obj->std,
547 			"value",
548 			sizeof("value") - 1,
549 			(char*) output.bp,
550 			output.size-1
551 		);
552 	}
553 
554 	tidyBufFree(&output);
555 
556 	if (obj->ptdoc->errbuf->size) {
557 		zend_update_property_stringl(
558 			tidy_ce_doc,
559 			&obj->std,
560 			"errorBuffer",
561 			sizeof("errorBuffer") - 1,
562 			(char*) obj->ptdoc->errbuf->bp,
563 			obj->ptdoc->errbuf->size-1
564 		);
565 	}
566 }
567 
tidy_add_node_default_properties(PHPTidyObj * obj)568 static void tidy_add_node_default_properties(PHPTidyObj *obj)
569 {
570 	TidyBuffer buf;
571 	TidyAttr	tempattr;
572 	TidyNode	tempnode;
573 	zval attribute, children, temp;
574 	PHPTidyObj *newobj;
575 	char *name;
576 
577 	tidyBufInit(&buf);
578 	tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
579 
580 	zend_update_property_stringl(
581 		tidy_ce_node,
582 		&obj->std,
583 		"value",
584 		sizeof("value") - 1,
585 		buf.size ? (char *) buf.bp : "",
586 		buf.size ? buf.size - 1 : 0
587 	);
588 
589 	tidyBufFree(&buf);
590 
591 	name = (char *) tidyNodeGetName(obj->node);
592 
593 	zend_update_property_string(
594 		tidy_ce_node,
595 		&obj->std,
596 		"name",
597 		sizeof("name") - 1,
598 		name ? name : ""
599 	);
600 
601 	zend_update_property_long(
602 		tidy_ce_node,
603 		&obj->std,
604 		"type",
605 		sizeof("type") - 1,
606 		tidyNodeGetType(obj->node)
607 	);
608 
609 	zend_update_property_long(
610 		tidy_ce_node,
611 		&obj->std,
612 		"line",
613 		sizeof("line") - 1,
614 		tidyNodeLine(obj->node)
615 	);
616 
617 	zend_update_property_long(
618 		tidy_ce_node,
619 		&obj->std,
620 		"column",
621 		sizeof("column") - 1,
622 		tidyNodeColumn(obj->node)
623 	);
624 
625 	zend_update_property_bool(
626 		tidy_ce_node,
627 		&obj->std,
628 		"proprietary",
629 		sizeof("proprietary") - 1,
630 		tidyNodeIsProp(obj->ptdoc->doc, obj->node)
631 	);
632 
633 	switch(tidyNodeGetType(obj->node)) {
634 		case TidyNode_Root:
635 		case TidyNode_DocType:
636 		case TidyNode_Text:
637 		case TidyNode_Comment:
638 			zend_update_property_null(
639 				tidy_ce_node,
640 				&obj->std,
641 				"id",
642 				sizeof("id") - 1
643 			);
644 			break;
645 
646 		default:
647 			zend_update_property_long(
648 				tidy_ce_node,
649 				&obj->std,
650 				"id",
651 				sizeof("id") - 1,
652 				tidyNodeGetId(obj->node)
653 			);
654 	}
655 
656 	tempattr = tidyAttrFirst(obj->node);
657 
658 	if (tempattr) {
659 		char *name, *val;
660 		array_init(&attribute);
661 
662 		do {
663 			name = (char *)tidyAttrName(tempattr);
664 			val = (char *)tidyAttrValue(tempattr);
665 			if (name) {
666 				if (val) {
667 					add_assoc_string(&attribute, name, val);
668 				} else {
669 					add_assoc_str(&attribute, name, zend_empty_string);
670 				}
671 			}
672 		} while((tempattr = tidyAttrNext(tempattr)));
673 	} else {
674 		ZVAL_NULL(&attribute);
675 	}
676 
677 	zend_update_property(
678 		tidy_ce_node,
679 		&obj->std,
680 		"attribute",
681 		sizeof("attribute") - 1,
682 		&attribute
683 	);
684 
685 	zval_ptr_dtor(&attribute);
686 
687 	tempnode = tidyGetChild(obj->node);
688 
689 	if (tempnode) {
690 		array_init(&children);
691 		do {
692 			tidy_instantiate(tidy_ce_node, &temp);
693 			newobj = Z_TIDY_P(&temp);
694 			newobj->node = tempnode;
695 			newobj->type = is_node;
696 			newobj->ptdoc = obj->ptdoc;
697 			newobj->ptdoc->ref_count++;
698 
699 			tidy_add_node_default_properties(newobj);
700 			add_next_index_zval(&children, &temp);
701 
702 		} while((tempnode = tidyGetNext(tempnode)));
703 
704 	} else {
705 		ZVAL_NULL(&children);
706 	}
707 
708 	zend_update_property(
709 		tidy_ce_node,
710 		&obj->std,
711 		"child",
712 		sizeof("child") - 1,
713 		&children
714 	);
715 
716 	zval_ptr_dtor(&children);
717 }
718 
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)719 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
720 {
721 	*type = tidyOptGetType(opt);
722 
723 	switch (*type) {
724 		case TidyString: {
725 			char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
726 			if (val) {
727 				return (void *) zend_string_init(val, strlen(val), 0);
728 			} else {
729 				return (void *) ZSTR_EMPTY_ALLOC();
730 			}
731 		}
732 			break;
733 
734 		case TidyInteger:
735 			return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
736 			break;
737 
738 		case TidyBoolean:
739 			return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
740 			break;
741 	}
742 
743 	/* should not happen */
744 	return NULL;
745 }
746 
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)747 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
748 {
749 	PHPTidyObj *newobj;
750 	TidyNode node;
751 	TIDY_FETCH_OBJECT;
752 
753 	switch (node_type) {
754 		case is_root_node:
755 			node = tidyGetRoot(obj->ptdoc->doc);
756 			break;
757 
758 		case is_html_node:
759 			node = tidyGetHtml(obj->ptdoc->doc);
760 			break;
761 
762 		case is_head_node:
763 			node = tidyGetHead(obj->ptdoc->doc);
764 			break;
765 
766 		case is_body_node:
767 			node = tidyGetBody(obj->ptdoc->doc);
768 			break;
769 
770 		EMPTY_SWITCH_DEFAULT_CASE()
771 	}
772 
773 	if (!node) {
774 		RETURN_NULL();
775 	}
776 
777 	tidy_instantiate(tidy_ce_node, return_value);
778 	newobj = Z_TIDY_P(return_value);
779 	newobj->type  = is_node;
780 	newobj->ptdoc = obj->ptdoc;
781 	newobj->node  = node;
782 	newobj->ptdoc->ref_count++;
783 
784 	tidy_add_node_default_properties(newobj);
785 }
786 
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)787 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
788 {
789 	zval *opt_val;
790 	zend_string *opt_name;
791 
792 	if (!HT_IS_PACKED(ht_options)) {
793 		ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
794 			if (opt_name == NULL) {
795 				continue;
796 			}
797 			_php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
798 		} ZEND_HASH_FOREACH_END();
799 	}
800 	return SUCCESS;
801 }
802 
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)803 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
804 {
805 	TidyBuffer buf;
806 
807 	if(enc) {
808 		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
809 			php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
810 			return FAILURE;
811 		}
812 	}
813 
814 	obj->ptdoc->initialized = 1;
815 
816 	tidyBufInit(&buf);
817 	tidyBufAttach(&buf, (byte *) string, len);
818 	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
819 		php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
820 		return FAILURE;
821 	}
822 	tidy_doc_update_properties(obj);
823 
824 	return SUCCESS;
825 }
826 
PHP_MINIT_FUNCTION(tidy)827 static PHP_MINIT_FUNCTION(tidy)
828 {
829 	tidySetMallocCall(php_tidy_malloc);
830 	tidySetReallocCall(php_tidy_realloc);
831 	tidySetFreeCall(php_tidy_free);
832 	tidySetPanicCall(php_tidy_panic);
833 
834 	REGISTER_INI_ENTRIES();
835 
836 	tidy_ce_doc = register_class_tidy();
837 	tidy_ce_doc->create_object = tidy_object_new_doc;
838 	memcpy(&tidy_object_handlers_doc, &std_object_handlers, sizeof(zend_object_handlers));
839 	tidy_object_handlers_doc.clone_obj = NULL;
840 
841 	tidy_ce_node = register_class_tidyNode();
842 	tidy_ce_node->create_object = tidy_object_new_node;
843 	memcpy(&tidy_object_handlers_node, &std_object_handlers, sizeof(zend_object_handlers));
844 	tidy_object_handlers_node.clone_obj = NULL;
845 
846 	tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
847 	tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
848 
849 	tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
850 	tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
851 
852 	register_tidy_symbols(module_number);
853 
854 	php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
855 
856 	return SUCCESS;
857 }
858 
PHP_RINIT_FUNCTION(tidy)859 static PHP_RINIT_FUNCTION(tidy)
860 {
861 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
862 	ZEND_TSRMLS_CACHE_UPDATE();
863 #endif
864 
865 	php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
866 
867 	return SUCCESS;
868 }
869 
PHP_RSHUTDOWN_FUNCTION(tidy)870 static PHP_RSHUTDOWN_FUNCTION(tidy)
871 {
872 	TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
873 
874 	return SUCCESS;
875 }
876 
PHP_MSHUTDOWN_FUNCTION(tidy)877 static PHP_MSHUTDOWN_FUNCTION(tidy)
878 {
879 	UNREGISTER_INI_ENTRIES();
880 	return SUCCESS;
881 }
882 
PHP_MINFO_FUNCTION(tidy)883 static PHP_MINFO_FUNCTION(tidy)
884 {
885 	php_info_print_table_start();
886 	php_info_print_table_row(2, "Tidy support", "enabled");
887 #ifdef HAVE_TIDYBUFFIO_H
888 	php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
889 #elif defined(HAVE_TIDYP_H)
890 	php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
891 #endif
892 #ifdef HAVE_TIDYRELEASEDATE
893 	php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
894 #endif
895 	php_info_print_table_end();
896 
897 	DISPLAY_INI_ENTRIES();
898 }
899 
PHP_INI_MH(php_tidy_set_clean_output)900 static PHP_INI_MH(php_tidy_set_clean_output)
901 {
902 	int status;
903 	bool value;
904 
905 	value = zend_ini_parse_bool(new_value);
906 
907 	if (stage == PHP_INI_STAGE_RUNTIME) {
908 		status = php_output_get_status();
909 
910 		if (value && (status & PHP_OUTPUT_WRITTEN)) {
911 			php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
912 			return FAILURE;
913 		}
914 		if (status & PHP_OUTPUT_SENT) {
915 			php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
916 			return FAILURE;
917 		}
918 	}
919 
920 	status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
921 
922 	if (stage == PHP_INI_STAGE_RUNTIME && value) {
923 		if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
924 			php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
925 		}
926 	}
927 
928 	return status;
929 }
930 
931 /*
932  * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
933  */
934 
php_tidy_clean_output_start(const char * name,size_t name_len)935 static void php_tidy_clean_output_start(const char *name, size_t name_len)
936 {
937 	php_output_handler *h;
938 
939 	if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
940 		php_output_handler_start(h);
941 	}
942 }
943 
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)944 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
945 {
946 	if (chunk_size) {
947 		php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
948 		return NULL;
949 	}
950 	if (!TG(clean_output)) {
951 		TG(clean_output) = 1;
952 	}
953 	return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
954 }
955 
php_tidy_output_handler(void ** nothing,php_output_context * output_context)956 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context)
957 {
958 	zend_result status = FAILURE;
959 	TidyDoc doc;
960 	TidyBuffer inbuf, outbuf, errbuf;
961 
962 	if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
963 		doc = tidyCreate();
964 		tidyBufInit(&errbuf);
965 
966 		if (0 == tidySetErrorBuffer(doc, &errbuf)) {
967 			tidyOptSetBool(doc, TidyForceOutput, yes);
968 			tidyOptSetBool(doc, TidyMark, no);
969 
970 			if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
971 				php_error_docref(NULL, E_WARNING, "Input string is too long");
972 				return status;
973 			}
974 
975 			TIDY_SET_DEFAULT_CONFIG(doc);
976 
977 			tidyBufInit(&inbuf);
978 			tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
979 
980 			if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
981 				tidyBufInit(&outbuf);
982 				tidySaveBuffer(doc, &outbuf);
983 				FIX_BUFFER(&outbuf);
984 				output_context->out.data = (char *) outbuf.bp;
985 				output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
986 				output_context->out.free = 1;
987 				status = SUCCESS;
988 			}
989 		}
990 
991 		tidyRelease(doc);
992 		tidyBufFree(&errbuf);
993 	}
994 
995 	return status;
996 }
997 
998 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)999 PHP_FUNCTION(tidy_parse_string)
1000 {
1001 	char *enc = NULL;
1002 	size_t enc_len = 0;
1003 	zend_string *input, *options_str = NULL;
1004 	HashTable *options_ht = NULL;
1005 	PHPTidyObj *obj;
1006 
1007 	ZEND_PARSE_PARAMETERS_START(1, 3)
1008 		Z_PARAM_STR(input)
1009 		Z_PARAM_OPTIONAL
1010 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1011 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1012 	ZEND_PARSE_PARAMETERS_END();
1013 
1014 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1015 		zend_argument_value_error(1, "is too long");
1016 		RETURN_THROWS();
1017 	}
1018 
1019 	tidy_instantiate(tidy_ce_doc, return_value);
1020 	obj = Z_TIDY_P(return_value);
1021 
1022 	TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1023 
1024 	if (php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1025 		zval_ptr_dtor(return_value);
1026 		RETURN_FALSE;
1027 	}
1028 }
1029 /* }}} */
1030 
1031 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1032 PHP_FUNCTION(tidy_get_error_buffer)
1033 {
1034 	TIDY_FETCH_OBJECT;
1035 
1036 	if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1037 		RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1038 	} else {
1039 		RETURN_FALSE;
1040 	}
1041 }
1042 /* }}} */
1043 
1044 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1045 PHP_FUNCTION(tidy_get_output)
1046 {
1047 	TidyBuffer output;
1048 	TIDY_FETCH_OBJECT;
1049 
1050 	tidyBufInit(&output);
1051 	tidySaveBuffer(obj->ptdoc->doc, &output);
1052 	FIX_BUFFER(&output);
1053 	RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1054 	tidyBufFree(&output);
1055 }
1056 /* }}} */
1057 
1058 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1059 PHP_FUNCTION(tidy_parse_file)
1060 {
1061 	char *enc = NULL;
1062 	size_t enc_len = 0;
1063 	bool use_include_path = 0;
1064 	zend_string *inputfile, *contents, *options_str = NULL;
1065 	HashTable *options_ht = NULL;
1066 
1067 	PHPTidyObj *obj;
1068 
1069 	ZEND_PARSE_PARAMETERS_START(1, 4)
1070 		Z_PARAM_PATH_STR(inputfile)
1071 		Z_PARAM_OPTIONAL
1072 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1073 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1074 		Z_PARAM_BOOL(use_include_path)
1075 	ZEND_PARSE_PARAMETERS_END();
1076 
1077 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1078 		php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1079 		RETURN_FALSE;
1080 	}
1081 
1082 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1083 		zend_string_release_ex(contents, 0);
1084 		zend_value_error("Input string is too long");
1085 		RETURN_THROWS();
1086 	}
1087 
1088 	tidy_instantiate(tidy_ce_doc, return_value);
1089 	obj = Z_TIDY_P(return_value);
1090 
1091 	TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1092 
1093 	if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1094 		zval_ptr_dtor(return_value);
1095 		RETVAL_FALSE;
1096 	}
1097 
1098 	zend_string_release_ex(contents, 0);
1099 }
1100 /* }}} */
1101 
1102 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1103 PHP_FUNCTION(tidy_clean_repair)
1104 {
1105 	TIDY_FETCH_OBJECT;
1106 
1107 	if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1108 		tidy_doc_update_properties(obj);
1109 		RETURN_TRUE;
1110 	}
1111 
1112 	RETURN_FALSE;
1113 }
1114 /* }}} */
1115 
1116 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1117 PHP_FUNCTION(tidy_repair_string)
1118 {
1119 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1120 }
1121 /* }}} */
1122 
1123 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1124 PHP_FUNCTION(tidy_repair_file)
1125 {
1126 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1127 }
1128 /* }}} */
1129 
1130 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1131 PHP_FUNCTION(tidy_diagnose)
1132 {
1133 	TIDY_FETCH_OBJECT;
1134 
1135 	if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1136 		tidy_doc_update_properties(obj);
1137 		RETURN_TRUE;
1138 	}
1139 
1140 	RETURN_FALSE;
1141 }
1142 /* }}} */
1143 
1144 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1145 PHP_FUNCTION(tidy_get_release)
1146 {
1147 	if (zend_parse_parameters_none() == FAILURE) {
1148 		RETURN_THROWS();
1149 	}
1150 
1151 #ifdef HAVE_TIDYRELEASEDATE
1152 	RETURN_STRING((char *)tidyReleaseDate());
1153 #else
1154 	RETURN_STRING((char *)"unknown");
1155 #endif
1156 }
1157 /* }}} */
1158 
1159 
1160 #ifdef HAVE_TIDYOPTGETDOC
1161 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1162 PHP_FUNCTION(tidy_get_opt_doc)
1163 {
1164 	PHPTidyObj *obj;
1165 	char *optval, *optname;
1166 	size_t optname_len;
1167 	TidyOption opt;
1168 	zval *object;
1169 
1170 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1171 		RETURN_THROWS();
1172 	}
1173 
1174 	obj = Z_TIDY_P(object);
1175 
1176 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1177 
1178 	if (!opt) {
1179 		zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1180 		RETURN_THROWS();
1181 	}
1182 
1183 	if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1184 		RETURN_STRING(optval);
1185 	}
1186 
1187 	RETURN_FALSE;
1188 }
1189 /* }}} */
1190 #endif
1191 
1192 
1193 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1194 PHP_FUNCTION(tidy_get_config)
1195 {
1196 	TidyIterator itOpt;
1197 	char *opt_name;
1198 	void *opt_value;
1199 	TidyOptionType optt;
1200 
1201 	TIDY_FETCH_OBJECT;
1202 
1203 	itOpt = tidyGetOptionList(obj->ptdoc->doc);
1204 
1205 	array_init(return_value);
1206 
1207 	while (itOpt) {
1208 		TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1209 
1210 		opt_name = (char *)tidyOptGetName(opt);
1211 		opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1212 		switch (optt) {
1213 			case TidyString:
1214 				add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1215 				break;
1216 
1217 			case TidyInteger:
1218 				add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1219 				break;
1220 
1221 			case TidyBoolean:
1222 				add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1223 				break;
1224 		}
1225 	}
1226 
1227 	return;
1228 }
1229 /* }}} */
1230 
1231 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1232 PHP_FUNCTION(tidy_get_status)
1233 {
1234 	TIDY_FETCH_OBJECT;
1235 
1236 	RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1237 }
1238 /* }}} */
1239 
1240 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1241 PHP_FUNCTION(tidy_get_html_ver)
1242 {
1243 	TIDY_FETCH_INITIALIZED_OBJECT;
1244 
1245 	RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1246 }
1247 /* }}} */
1248 
1249 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1250 PHP_FUNCTION(tidy_is_xhtml)
1251 {
1252 	TIDY_FETCH_INITIALIZED_OBJECT;
1253 
1254 	RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1255 }
1256 /* }}} */
1257 
1258 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1259 PHP_FUNCTION(tidy_is_xml)
1260 {
1261 	TIDY_FETCH_INITIALIZED_OBJECT;
1262 
1263 	RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1264 }
1265 /* }}} */
1266 
1267 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1268 PHP_FUNCTION(tidy_error_count)
1269 {
1270 	TIDY_FETCH_OBJECT;
1271 
1272 	RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1273 }
1274 /* }}} */
1275 
1276 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1277 PHP_FUNCTION(tidy_warning_count)
1278 {
1279 	TIDY_FETCH_OBJECT;
1280 
1281 	RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1282 }
1283 /* }}} */
1284 
1285 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1286 PHP_FUNCTION(tidy_access_count)
1287 {
1288 	TIDY_FETCH_OBJECT;
1289 
1290 	RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1291 }
1292 /* }}} */
1293 
1294 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1295 PHP_FUNCTION(tidy_config_count)
1296 {
1297 	TIDY_FETCH_OBJECT;
1298 
1299 	RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1300 }
1301 /* }}} */
1302 
1303 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1304 PHP_FUNCTION(tidy_getopt)
1305 {
1306 	PHPTidyObj *obj;
1307 	char *optname;
1308 	void *optval;
1309 	size_t optname_len;
1310 	TidyOption opt;
1311 	TidyOptionType optt;
1312 	zval *object;
1313 
1314 	if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1315 		RETURN_THROWS();
1316 	}
1317 
1318 	obj = Z_TIDY_P(object);
1319 
1320 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1321 
1322 	if (!opt) {
1323 		zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1324 		RETURN_THROWS();
1325 	}
1326 
1327 	optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1328 	switch (optt) {
1329 		case TidyString:
1330 			RETVAL_STR((zend_string*)optval);
1331 			return;
1332 
1333 		case TidyInteger:
1334 			RETURN_LONG((zend_long)optval);
1335 			break;
1336 
1337 		case TidyBoolean:
1338 			if (optval) {
1339 				RETURN_TRUE;
1340 			} else {
1341 				RETURN_FALSE;
1342 			}
1343 			break;
1344 
1345 		default:
1346 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1347 			break;
1348 	}
1349 
1350 	RETURN_FALSE;
1351 }
1352 /* }}} */
1353 
PHP_METHOD(tidy,__construct)1354 PHP_METHOD(tidy, __construct)
1355 {
1356 	char *enc = NULL;
1357 	size_t enc_len = 0;
1358 	bool use_include_path = 0;
1359 	HashTable *options_ht = NULL;
1360 	zend_string *contents, *inputfile = NULL, *options_str = NULL;
1361 	PHPTidyObj *obj;
1362 
1363 	ZEND_PARSE_PARAMETERS_START(0, 4)
1364 		Z_PARAM_OPTIONAL
1365 		Z_PARAM_PATH_STR_OR_NULL(inputfile)
1366 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1367 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1368 		Z_PARAM_BOOL(use_include_path)
1369 	ZEND_PARSE_PARAMETERS_END();
1370 
1371 	TIDY_SET_CONTEXT;
1372 	obj = Z_TIDY_P(object);
1373 
1374 	if (inputfile) {
1375 		if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1376 			php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1377 			return;
1378 		}
1379 
1380 		if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1381 			zend_string_release_ex(contents, 0);
1382 			zend_value_error("Input string is too long");
1383 			RETURN_THROWS();
1384 		}
1385 
1386 		TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1387 
1388 		php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1389 
1390 		zend_string_release_ex(contents, 0);
1391 	}
1392 }
1393 
PHP_METHOD(tidy,parseFile)1394 PHP_METHOD(tidy, parseFile)
1395 {
1396 	char *enc = NULL;
1397 	size_t enc_len = 0;
1398 	bool use_include_path = 0;
1399 	HashTable *options_ht = NULL;
1400 	zend_string *inputfile, *contents, *options_str = NULL;
1401 	PHPTidyObj *obj;
1402 
1403 	ZEND_PARSE_PARAMETERS_START(1, 4)
1404 		Z_PARAM_PATH_STR(inputfile)
1405 		Z_PARAM_OPTIONAL
1406 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1407 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1408 		Z_PARAM_BOOL(use_include_path)
1409 	ZEND_PARSE_PARAMETERS_END();
1410 
1411 	TIDY_SET_CONTEXT;
1412 	obj = Z_TIDY_P(object);
1413 
1414 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1415 		php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1416 		RETURN_FALSE;
1417 	}
1418 
1419 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1420 		zend_string_release_ex(contents, 0);
1421 		zend_value_error("Input string is too long");
1422 		RETURN_THROWS();
1423 	}
1424 
1425 	TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1426 
1427 	if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1428 		RETVAL_FALSE;
1429 	} else {
1430 		RETVAL_TRUE;
1431 	}
1432 
1433 	zend_string_release_ex(contents, 0);
1434 }
1435 
PHP_METHOD(tidy,parseString)1436 PHP_METHOD(tidy, parseString)
1437 {
1438 	char *enc = NULL;
1439 	size_t enc_len = 0;
1440 	HashTable *options_ht = NULL;
1441 	PHPTidyObj *obj;
1442 	zend_string *input, *options_str = NULL;
1443 
1444 	ZEND_PARSE_PARAMETERS_START(1, 3)
1445 		Z_PARAM_STR(input)
1446 		Z_PARAM_OPTIONAL
1447 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1448 		Z_PARAM_STRING_OR_NULL(enc, enc_len)
1449 	ZEND_PARSE_PARAMETERS_END();
1450 
1451 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1452 		zend_argument_value_error(1, "is too long");
1453 		RETURN_THROWS();
1454 	}
1455 
1456 	TIDY_SET_CONTEXT;
1457 	obj = Z_TIDY_P(object);
1458 
1459 	TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1460 
1461 	if(php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1462 		RETURN_TRUE;
1463 	}
1464 
1465 	RETURN_FALSE;
1466 }
1467 
1468 
1469 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1470 PHP_FUNCTION(tidy_get_root)
1471 {
1472 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1473 }
1474 /* }}} */
1475 
1476 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1477 PHP_FUNCTION(tidy_get_html)
1478 {
1479 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1480 }
1481 /* }}} */
1482 
1483 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1484 PHP_FUNCTION(tidy_get_head)
1485 {
1486 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1487 }
1488 /* }}} */
1489 
1490 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1491 PHP_FUNCTION(tidy_get_body)
1492 {
1493 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1494 }
1495 /* }}} */
1496 
1497 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1498 PHP_METHOD(tidyNode, hasChildren)
1499 {
1500 	TIDY_FETCH_ONLY_OBJECT;
1501 
1502 	if (tidyGetChild(obj->node)) {
1503 		RETURN_TRUE;
1504 	} else {
1505 		RETURN_FALSE;
1506 	}
1507 }
1508 /* }}} */
1509 
1510 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1511 PHP_METHOD(tidyNode, hasSiblings)
1512 {
1513 	TIDY_FETCH_ONLY_OBJECT;
1514 
1515 	if (obj->node && tidyGetNext(obj->node)) {
1516 		RETURN_TRUE;
1517 	} else {
1518 		RETURN_FALSE;
1519 	}
1520 }
1521 /* }}} */
1522 
1523 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1524 PHP_METHOD(tidyNode, isComment)
1525 {
1526 	TIDY_FETCH_ONLY_OBJECT;
1527 
1528 	if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1529 		RETURN_TRUE;
1530 	} else {
1531 		RETURN_FALSE;
1532 	}
1533 }
1534 /* }}} */
1535 
1536 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1537 PHP_METHOD(tidyNode, isHtml)
1538 {
1539 	TIDY_FETCH_ONLY_OBJECT;
1540 
1541 	switch (tidyNodeGetType(obj->node)) {
1542 		case TidyNode_Start:
1543 		case TidyNode_End:
1544 		case TidyNode_StartEnd:
1545 			RETURN_TRUE;
1546 		default:
1547 			RETURN_FALSE;
1548 	}
1549 }
1550 /* }}} */
1551 
1552 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1553 PHP_METHOD(tidyNode, isText)
1554 {
1555 	TIDY_FETCH_ONLY_OBJECT;
1556 
1557 	if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1558 		RETURN_TRUE;
1559 	} else {
1560 		RETURN_FALSE;
1561 	}
1562 }
1563 /* }}} */
1564 
1565 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1566 PHP_METHOD(tidyNode, isJste)
1567 {
1568 	TIDY_FETCH_ONLY_OBJECT;
1569 
1570 	if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1571 		RETURN_TRUE;
1572 	} else {
1573 		RETURN_FALSE;
1574 	}
1575 }
1576 /* }}} */
1577 
1578 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1579 PHP_METHOD(tidyNode, isAsp)
1580 {
1581 	TIDY_FETCH_ONLY_OBJECT;
1582 
1583 	if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1584 		RETURN_TRUE;
1585 	} else {
1586 		RETURN_FALSE;
1587 	}
1588 }
1589 /* }}} */
1590 
1591 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1592 PHP_METHOD(tidyNode, isPhp)
1593 {
1594 	TIDY_FETCH_ONLY_OBJECT;
1595 
1596 	if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1597 		RETURN_TRUE;
1598 	} else {
1599 		RETURN_FALSE;
1600 	}
1601 }
1602 /* }}} */
1603 
1604 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1605 PHP_METHOD(tidyNode, getParent)
1606 {
1607 	TidyNode	parent_node;
1608 	PHPTidyObj *newobj;
1609 	TIDY_FETCH_ONLY_OBJECT;
1610 
1611 	parent_node = tidyGetParent(obj->node);
1612 	if(parent_node) {
1613 		tidy_instantiate(tidy_ce_node, return_value);
1614 		newobj = Z_TIDY_P(return_value);
1615 		newobj->node = parent_node;
1616 		newobj->type = is_node;
1617 		newobj->ptdoc = obj->ptdoc;
1618 		newobj->ptdoc->ref_count++;
1619 		tidy_add_node_default_properties(newobj);
1620 	} else {
1621 		ZVAL_NULL(return_value);
1622 	}
1623 }
1624 /* }}} */
1625 
1626 
1627 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1628 PHP_METHOD(tidyNode, __construct)
1629 {
1630 	zend_throw_error(NULL, "You should not create a tidyNode manually");
1631 }
1632 /* }}} */
1633 
1634 #endif
1635