xref: /PHP-7.4/ext/tidy/tidy.c (revision 221345a0)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) The PHP Group                                          |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: John Coggeshall <john@php.net>                               |
16   +----------------------------------------------------------------------+
17 */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "php.h"
24 #include "php_tidy.h"
25 
26 #if HAVE_TIDY
27 
28 #include "php_ini.h"
29 #include "ext/standard/info.h"
30 
31 #if HAVE_TIDY_H
32 #include "tidy.h"
33 #elif HAVE_TIDYP_H
34 #include "tidyp.h"
35 #endif
36 
37 #if HAVE_TIDYBUFFIO_H
38 #include "tidybuffio.h"
39 #else
40 #include "buffio.h"
41 #endif
42 
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47 
48 /* {{{ ext/tidy macros
49 */
50 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
51 
52 #define TIDY_SET_CONTEXT \
53     zval *object = getThis();
54 
55 #define TIDY_FETCH_OBJECT	\
56 	PHPTidyObj *obj;	\
57 	TIDY_SET_CONTEXT; \
58 	if (object) {	\
59 		if (zend_parse_parameters_none() == FAILURE) {	\
60 			return;	\
61 		}	\
62 	} else {	\
63 		if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "O", &object, tidy_ce_doc) == FAILURE) {	\
64 			RETURN_FALSE;	\
65 		}	\
66 	}	\
67 	obj = Z_TIDY_P(object);	\
68 
69 #define TIDY_FETCH_INITIALIZED_OBJECT \
70 	TIDY_FETCH_OBJECT; \
71 	if (!obj->ptdoc->initialized) { \
72 		zend_throw_error(NULL, "tidy object is not initialized"); \
73 		return; \
74 	}
75 
76 #define TIDY_FETCH_ONLY_OBJECT	\
77 	PHPTidyObj *obj;	\
78 	TIDY_SET_CONTEXT; \
79 	if (zend_parse_parameters_none() == FAILURE) {	\
80 		return;	\
81 	}	\
82 	obj = Z_TIDY_P(object);	\
83 
84 #define TIDY_APPLY_CONFIG_ZVAL(_doc, _val) \
85     if(_val) { \
86         if(Z_TYPE_P(_val) == IS_ARRAY) { \
87             _php_tidy_apply_config_array(_doc, Z_ARRVAL_P(_val)); \
88         } else { \
89             convert_to_string_ex(_val); \
90             TIDY_OPEN_BASE_DIR_CHECK(Z_STRVAL_P(_val)); \
91             switch (tidyLoadConfig(_doc, Z_STRVAL_P(_val))) { \
92               case -1: \
93                 php_error_docref(NULL, E_WARNING, "Could not load configuration file '%s'", Z_STRVAL_P(_val)); \
94                 break; \
95               case 1: \
96                 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the configuration file '%s'", Z_STRVAL_P(_val)); \
97                 break; \
98             } \
99         } \
100     }
101 
102 #define REGISTER_TIDY_CLASS(classname, name, parent, __flags) \
103 	{ \
104 		zend_class_entry ce; \
105 		INIT_CLASS_ENTRY(ce, # classname, tidy_funcs_ ## name); \
106 		ce.create_object = tidy_object_new_ ## name; \
107 		tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent); \
108 		tidy_ce_ ## name->ce_flags |= __flags;  \
109 		memcpy(&tidy_object_handlers_ ## name, &std_object_handlers, sizeof(zend_object_handlers)); \
110 		tidy_object_handlers_ ## name.clone_obj = NULL; \
111 	}
112 
113 #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
114 #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
115 
116 #ifndef TRUE
117 #define TRUE 1
118 #endif
119 
120 #ifndef FALSE
121 #define FALSE 0
122 #endif
123 
124 #define ADD_PROPERTY_STRING(_table, _key, _string) \
125 	{ \
126 		zval tmp; \
127 		if (_string) { \
128 			ZVAL_STRING(&tmp, (char *)_string); \
129 		} else { \
130 			ZVAL_EMPTY_STRING(&tmp); \
131 		} \
132 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
133 	}
134 
135 #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
136    { \
137        zval tmp; \
138        if (_string) { \
139            ZVAL_STRINGL(&tmp, (char *)_string, _len); \
140        } else { \
141            ZVAL_EMPTY_STRING(&tmp); \
142        } \
143        zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
144    }
145 
146 #define ADD_PROPERTY_LONG(_table, _key, _long) \
147 	{ \
148 		zval tmp; \
149 		ZVAL_LONG(&tmp, _long); \
150 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
151 	}
152 
153 #define ADD_PROPERTY_NULL(_table, _key) \
154 	{ \
155 		zval tmp; \
156 		ZVAL_NULL(&tmp); \
157 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
158 	}
159 
160 #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
161     { \
162 		zval tmp; \
163 		ZVAL_BOOL(&tmp, _bool); \
164 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
165 	}
166 
167 #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
168 if (php_check_open_basedir(filename)) { \
169 	RETURN_FALSE; \
170 } \
171 
172 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
173 	if (TG(default_config) && TG(default_config)[0]) { \
174 		if (tidyLoadConfig(_doc, TG(default_config)) < 0) { \
175 			php_error_docref(NULL, E_WARNING, "Unable to load Tidy configuration file at '%s'.", TG(default_config)); \
176 		} \
177 	}
178 /* }}} */
179 
180 /* {{{ ext/tidy structs
181 */
182 typedef struct _PHPTidyDoc PHPTidyDoc;
183 typedef struct _PHPTidyObj PHPTidyObj;
184 
185 typedef enum {
186 	is_node,
187 	is_doc
188 } tidy_obj_type;
189 
190 typedef enum {
191 	is_root_node,
192 	is_html_node,
193 	is_head_node,
194 	is_body_node
195 } tidy_base_nodetypes;
196 
197 struct _PHPTidyDoc {
198 	TidyDoc			doc;
199 	TidyBuffer		*errbuf;
200 	unsigned int	ref_count;
201 	unsigned int    initialized:1;
202 };
203 
204 struct _PHPTidyObj {
205 	TidyNode		node;
206 	tidy_obj_type	type;
207 	PHPTidyDoc		*ptdoc;
208 	zend_object		std;
209 };
210 
php_tidy_fetch_object(zend_object * obj)211 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
212 	return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
213 }
214 
215 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
216 /* }}} */
217 
218 /* {{{ ext/tidy prototypes
219 */
220 static zend_string *php_tidy_file_to_mem(char *, zend_bool);
221 static void tidy_object_free_storage(zend_object *);
222 static zend_object *tidy_object_new_node(zend_class_entry *);
223 static zend_object *tidy_object_new_doc(zend_class_entry *);
224 static zval * tidy_instanciate(zend_class_entry *, zval *);
225 static int tidy_doc_cast_handler(zval *, zval *, int);
226 static int tidy_node_cast_handler(zval *, zval *, int);
227 static void tidy_doc_update_properties(PHPTidyObj *);
228 static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type);
229 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
230 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
231 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
232 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
233 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
234 static void _php_tidy_register_tags(INIT_FUNC_ARGS);
235 static PHP_INI_MH(php_tidy_set_clean_output);
236 static void php_tidy_clean_output_start(const char *name, size_t name_len);
237 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
238 static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
239 
240 static PHP_MINIT_FUNCTION(tidy);
241 static PHP_MSHUTDOWN_FUNCTION(tidy);
242 static PHP_RINIT_FUNCTION(tidy);
243 static PHP_RSHUTDOWN_FUNCTION(tidy);
244 static PHP_MINFO_FUNCTION(tidy);
245 
246 static PHP_FUNCTION(tidy_getopt);
247 static PHP_FUNCTION(tidy_parse_string);
248 static PHP_FUNCTION(tidy_parse_file);
249 static PHP_FUNCTION(tidy_clean_repair);
250 static PHP_FUNCTION(tidy_repair_string);
251 static PHP_FUNCTION(tidy_repair_file);
252 static PHP_FUNCTION(tidy_diagnose);
253 static PHP_FUNCTION(tidy_get_output);
254 static PHP_FUNCTION(tidy_get_error_buffer);
255 static PHP_FUNCTION(tidy_get_release);
256 static PHP_FUNCTION(tidy_get_config);
257 static PHP_FUNCTION(tidy_get_status);
258 static PHP_FUNCTION(tidy_get_html_ver);
259 #if HAVE_TIDYOPTGETDOC
260 static PHP_FUNCTION(tidy_get_opt_doc);
261 #endif
262 static PHP_FUNCTION(tidy_is_xhtml);
263 static PHP_FUNCTION(tidy_is_xml);
264 static PHP_FUNCTION(tidy_error_count);
265 static PHP_FUNCTION(tidy_warning_count);
266 static PHP_FUNCTION(tidy_access_count);
267 static PHP_FUNCTION(tidy_config_count);
268 
269 static PHP_FUNCTION(tidy_get_root);
270 static PHP_FUNCTION(tidy_get_html);
271 static PHP_FUNCTION(tidy_get_head);
272 static PHP_FUNCTION(tidy_get_body);
273 
274 static TIDY_DOC_METHOD(__construct);
275 static TIDY_DOC_METHOD(parseFile);
276 static TIDY_DOC_METHOD(parseString);
277 
278 static TIDY_NODE_METHOD(hasChildren);
279 static TIDY_NODE_METHOD(hasSiblings);
280 static TIDY_NODE_METHOD(isComment);
281 static TIDY_NODE_METHOD(isHtml);
282 static TIDY_NODE_METHOD(isText);
283 static TIDY_NODE_METHOD(isJste);
284 static TIDY_NODE_METHOD(isAsp);
285 static TIDY_NODE_METHOD(isPhp);
286 static TIDY_NODE_METHOD(getParent);
287 static TIDY_NODE_METHOD(__construct);
288 /* }}} */
289 
290 ZEND_DECLARE_MODULE_GLOBALS(tidy)
291 
292 PHP_INI_BEGIN()
293 STD_PHP_INI_ENTRY("tidy.default_config",	"",		PHP_INI_SYSTEM,		OnUpdateString,				default_config,		zend_tidy_globals,	tidy_globals)
294 STD_PHP_INI_ENTRY("tidy.clean_output",		"0",	PHP_INI_USER,		php_tidy_set_clean_output,	clean_output,		zend_tidy_globals,	tidy_globals)
295 PHP_INI_END()
296 
297 /* {{{ arginfo */
298 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_string, 0, 0, 1)
299 	ZEND_ARG_INFO(0, input)
300 	ZEND_ARG_INFO(0, config_options)
301 	ZEND_ARG_INFO(0, encoding)
302 ZEND_END_ARG_INFO()
303 
304 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_error_buffer, 0, 0, 1)
305     ZEND_ARG_INFO(0, object)
306 ZEND_END_ARG_INFO()
307 
308 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_output, 0, 0, 1)
309     ZEND_ARG_INFO(0, object)
310 ZEND_END_ARG_INFO()
311 
312 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_file, 0, 0, 1)
313 	ZEND_ARG_INFO(0, file)
314 	ZEND_ARG_INFO(0, config_options)
315 	ZEND_ARG_INFO(0, encoding)
316 	ZEND_ARG_INFO(0, use_include_path)
317 ZEND_END_ARG_INFO()
318 
319 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_clean_repair, 0, 0, 1)
320     ZEND_ARG_INFO(0, object)
321 ZEND_END_ARG_INFO()
322 
323 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_string, 0, 0, 1)
324 	ZEND_ARG_INFO(0, data)
325 	ZEND_ARG_INFO(0, config_file)
326 	ZEND_ARG_INFO(0, encoding)
327 ZEND_END_ARG_INFO()
328 
329 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_file, 0, 0, 1)
330 	ZEND_ARG_INFO(0, filename)
331 	ZEND_ARG_INFO(0, config_file)
332 	ZEND_ARG_INFO(0, encoding)
333 	ZEND_ARG_INFO(0, use_include_path)
334 ZEND_END_ARG_INFO()
335 
336 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_diagnose, 0, 0, 1)
337     ZEND_ARG_INFO(0, object)
338 ZEND_END_ARG_INFO()
339 
340 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_release, 0)
341 ZEND_END_ARG_INFO()
342 
343 #if HAVE_TIDYOPTGETDOC
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc, 0, 0, 2)
345 	ZEND_ARG_INFO(0, resource)
346 	ZEND_ARG_INFO(0, optname)
347 ZEND_END_ARG_INFO()
348 
349 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc_method, 0, 0, 1)
350 	ZEND_ARG_INFO(0, optname)
351 ZEND_END_ARG_INFO()
352 #endif
353 
354 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_config, 0, 0, 1)
355     ZEND_ARG_INFO(0, object)
356 ZEND_END_ARG_INFO()
357 
358 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_status, 0, 0, 1)
359     ZEND_ARG_INFO(0, object)
360 ZEND_END_ARG_INFO()
361 
362 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_html_ver, 0, 0, 1)
363     ZEND_ARG_INFO(0, object)
364 ZEND_END_ARG_INFO()
365 
366 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xhtml, 0, 0, 1)
367     ZEND_ARG_INFO(0, object)
368 ZEND_END_ARG_INFO()
369 
370 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xml, 0, 0, 1)
371     ZEND_ARG_INFO(0, object)
372 ZEND_END_ARG_INFO()
373 
374 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_error_count, 0, 0, 1)
375     ZEND_ARG_INFO(0, object)
376 ZEND_END_ARG_INFO()
377 
378 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_warning_count, 0, 0, 1)
379     ZEND_ARG_INFO(0, object)
380 ZEND_END_ARG_INFO()
381 
382 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_access_count, 0, 0, 1)
383     ZEND_ARG_INFO(0, object)
384 ZEND_END_ARG_INFO()
385 
386 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_config_count, 0, 0, 1)
387     ZEND_ARG_INFO(0, object)
388 ZEND_END_ARG_INFO()
389 
390 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt_method, 0, 0, 1)
391 	ZEND_ARG_INFO(0, option)
392 ZEND_END_ARG_INFO()
393 
394 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt, 0, 0, 2)
395 	ZEND_ARG_INFO(0, object)
396 	ZEND_ARG_INFO(0, option)
397 ZEND_END_ARG_INFO()
398 
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_root, 0, 0, 1)
400 	ZEND_ARG_INFO(0, object)
401 ZEND_END_ARG_INFO()
402 
403 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_html, 0, 0, 1)
404 	ZEND_ARG_INFO(0, object)
405 ZEND_END_ARG_INFO()
406 
407 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_head, 0, 0, 1)
408 	ZEND_ARG_INFO(0, object)
409 ZEND_END_ARG_INFO()
410 
411 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_body, 0, 0, 1)
412 	ZEND_ARG_INFO(0, tidy)
413 ZEND_END_ARG_INFO()
414 
415 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_construct, 0, 0, 0)
416     ZEND_ARG_INFO(0, filename)
417     ZEND_ARG_INFO(0, config_file)
418     ZEND_ARG_INFO(0, encoding)
419     ZEND_ARG_INFO(0, use_include_path)
420 ZEND_END_ARG_INFO()
421 /* }}} */
422 
423 static const zend_function_entry tidy_functions[] = {
424 	PHP_FE(tidy_getopt,             arginfo_tidy_getopt)
425 	PHP_FE(tidy_parse_string,       arginfo_tidy_parse_string)
426 	PHP_FE(tidy_parse_file,         arginfo_tidy_parse_file)
427 	PHP_FE(tidy_get_output,         arginfo_tidy_get_output)
428 	PHP_FE(tidy_get_error_buffer,   arginfo_tidy_get_error_buffer)
429 	PHP_FE(tidy_clean_repair,       arginfo_tidy_clean_repair)
430 	PHP_FE(tidy_repair_string,	arginfo_tidy_repair_string)
431 	PHP_FE(tidy_repair_file,	arginfo_tidy_repair_file)
432 	PHP_FE(tidy_diagnose,           arginfo_tidy_diagnose)
433 	PHP_FE(tidy_get_release,	arginfo_tidy_get_release)
434 	PHP_FE(tidy_get_config,		arginfo_tidy_get_config)
435 	PHP_FE(tidy_get_status,		arginfo_tidy_get_status)
436 	PHP_FE(tidy_get_html_ver,	arginfo_tidy_get_html_ver)
437 	PHP_FE(tidy_is_xhtml,		arginfo_tidy_is_xhtml)
438 	PHP_FE(tidy_is_xml,		arginfo_tidy_is_xml)
439 	PHP_FE(tidy_error_count,	arginfo_tidy_error_count)
440 	PHP_FE(tidy_warning_count,	arginfo_tidy_warning_count)
441 	PHP_FE(tidy_access_count,	arginfo_tidy_access_count)
442 	PHP_FE(tidy_config_count,	arginfo_tidy_config_count)
443 #if HAVE_TIDYOPTGETDOC
444 	PHP_FE(tidy_get_opt_doc,	arginfo_tidy_get_opt_doc)
445 #endif
446 	PHP_FE(tidy_get_root,		arginfo_tidy_get_root)
447 	PHP_FE(tidy_get_head,		arginfo_tidy_get_head)
448 	PHP_FE(tidy_get_html,		arginfo_tidy_get_html)
449 	PHP_FE(tidy_get_body,		arginfo_tidy_get_body)
450 	PHP_FE_END
451 };
452 
453 static const zend_function_entry tidy_funcs_doc[] = {
454 	TIDY_METHOD_MAP(getOpt, tidy_getopt, arginfo_tidy_getopt_method)
455 	TIDY_METHOD_MAP(cleanRepair, tidy_clean_repair, NULL)
456 	TIDY_DOC_ME(parseFile, arginfo_tidy_parse_file)
457 	TIDY_DOC_ME(parseString, arginfo_tidy_parse_string)
458 	TIDY_METHOD_MAP(repairString, tidy_repair_string, arginfo_tidy_repair_string)
459 	TIDY_METHOD_MAP(repairFile, tidy_repair_file, arginfo_tidy_repair_file)
460 	TIDY_METHOD_MAP(diagnose, tidy_diagnose, NULL)
461 	TIDY_METHOD_MAP(getRelease, tidy_get_release, NULL)
462 	TIDY_METHOD_MAP(getConfig, tidy_get_config, NULL)
463 	TIDY_METHOD_MAP(getStatus, tidy_get_status, NULL)
464 	TIDY_METHOD_MAP(getHtmlVer, tidy_get_html_ver, NULL)
465 #if HAVE_TIDYOPTGETDOC
466 	TIDY_METHOD_MAP(getOptDoc, tidy_get_opt_doc, arginfo_tidy_get_opt_doc_method)
467 #endif
468 	TIDY_METHOD_MAP(isXhtml, tidy_is_xhtml, NULL)
469 	TIDY_METHOD_MAP(isXml, tidy_is_xml, NULL)
470 	TIDY_METHOD_MAP(root, tidy_get_root, NULL)
471 	TIDY_METHOD_MAP(head, tidy_get_head, NULL)
472 	TIDY_METHOD_MAP(html, tidy_get_html, NULL)
473 	TIDY_METHOD_MAP(body, tidy_get_body, NULL)
474 	TIDY_DOC_ME(__construct, arginfo_tidy_construct)
475 	PHP_FE_END
476 };
477 
478 static const zend_function_entry tidy_funcs_node[] = {
479 	TIDY_NODE_ME(hasChildren, NULL)
480 	TIDY_NODE_ME(hasSiblings, NULL)
481 	TIDY_NODE_ME(isComment, NULL)
482 	TIDY_NODE_ME(isHtml, NULL)
483 	TIDY_NODE_ME(isText, NULL)
484 	TIDY_NODE_ME(isJste, NULL)
485 	TIDY_NODE_ME(isAsp, NULL)
486 	TIDY_NODE_ME(isPhp, NULL)
487 	TIDY_NODE_ME(getParent, NULL)
488 	TIDY_NODE_PRIVATE_ME(__construct, NULL)
489 	PHP_FE_END
490 };
491 
492 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
493 
494 static zend_object_handlers tidy_object_handlers_doc;
495 static zend_object_handlers tidy_object_handlers_node;
496 
497 zend_module_entry tidy_module_entry = {
498 	STANDARD_MODULE_HEADER,
499 	"tidy",
500 	tidy_functions,
501 	PHP_MINIT(tidy),
502 	PHP_MSHUTDOWN(tidy),
503 	PHP_RINIT(tidy),
504 	PHP_RSHUTDOWN(tidy),
505 	PHP_MINFO(tidy),
506 	PHP_TIDY_VERSION,
507 	PHP_MODULE_GLOBALS(tidy),
508 	NULL,
509 	NULL,
510 	NULL,
511 	STANDARD_MODULE_PROPERTIES_EX
512 };
513 
514 #ifdef COMPILE_DL_TIDY
515 #ifdef ZTS
516 ZEND_TSRMLS_CACHE_DEFINE()
517 #endif
ZEND_GET_MODULE(tidy)518 ZEND_GET_MODULE(tidy)
519 #endif
520 
521 static void* TIDY_CALL php_tidy_malloc(size_t len)
522 {
523 	return emalloc(len);
524 }
525 
php_tidy_realloc(void * buf,size_t len)526 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
527 {
528 	return erealloc(buf, len);
529 }
530 
php_tidy_free(void * buf)531 static void TIDY_CALL php_tidy_free(void *buf)
532 {
533 	efree(buf);
534 }
535 
php_tidy_panic(ctmbstr msg)536 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
537 {
538 	php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
539 }
540 
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)541 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
542 {
543 	TidyOption opt = tidyGetOptionByName(doc, optname);
544 	zend_string *str, *tmp_str;
545 	zend_long lval;
546 
547 	if (!opt) {
548 		php_error_docref(NULL, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname);
549 		return FAILURE;
550 	}
551 
552 	if (tidyOptIsReadOnly(opt)) {
553 		php_error_docref(NULL, E_NOTICE, "Attempting to set read-only option '%s'", optname);
554 		return FAILURE;
555 	}
556 
557 	switch(tidyOptGetType(opt)) {
558 		case TidyString:
559 			str = zval_get_tmp_string(value, &tmp_str);
560 			if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
561 				zend_tmp_string_release(tmp_str);
562 				return SUCCESS;
563 			}
564 			zend_tmp_string_release(tmp_str);
565 			break;
566 
567 		case TidyInteger:
568 			lval = zval_get_long(value);
569 			if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
570 				return SUCCESS;
571 			}
572 			break;
573 
574 		case TidyBoolean:
575 			lval = zval_get_long(value);
576 			if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
577 				return SUCCESS;
578 			}
579 			break;
580 
581 		default:
582 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
583 			break;
584 	}
585 
586 	return FAILURE;
587 }
588 
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,zend_bool is_file)589 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
590 {
591 	char *enc = NULL;
592 	size_t enc_len = 0;
593 	zend_bool use_include_path = 0;
594 	TidyDoc doc;
595 	TidyBuffer *errbuf;
596 	zend_string *data, *arg1;
597 	zval *config = NULL;
598 
599 	if (is_file) {
600 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
601 			RETURN_FALSE;
602 		}
603 		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
604 			RETURN_FALSE;
605 		}
606 	} else {
607 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
608 			RETURN_FALSE;
609 		}
610 		data = arg1;
611 	}
612 
613 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
614 		php_error_docref(NULL, E_WARNING, "Input string is too long");
615 		RETURN_FALSE;
616 	}
617 
618 	doc = tidyCreate();
619 	errbuf = emalloc(sizeof(TidyBuffer));
620 	tidyBufInit(errbuf);
621 
622 	if (tidySetErrorBuffer(doc, errbuf) != 0) {
623 		tidyBufFree(errbuf);
624 		efree(errbuf);
625 		tidyRelease(doc);
626 		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
627 	}
628 
629 	tidyOptSetBool(doc, TidyForceOutput, yes);
630 	tidyOptSetBool(doc, TidyMark, no);
631 
632 	TIDY_SET_DEFAULT_CONFIG(doc);
633 
634 	if (config) {
635 		TIDY_APPLY_CONFIG_ZVAL(doc, config);
636 	}
637 
638 	if(enc_len) {
639 		if (tidySetCharEncoding(doc, enc) < 0) {
640 			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
641 			RETVAL_FALSE;
642 		}
643 	}
644 
645 	if (data) {
646 		TidyBuffer buf;
647 
648 		tidyBufInit(&buf);
649 		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
650 
651 		if (tidyParseBuffer(doc, &buf) < 0) {
652 			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
653 			RETVAL_FALSE;
654 		} else {
655 			if (tidyCleanAndRepair(doc) >= 0) {
656 				TidyBuffer output;
657 				tidyBufInit(&output);
658 
659 				tidySaveBuffer (doc, &output);
660 				FIX_BUFFER(&output);
661 				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
662 				tidyBufFree(&output);
663 			} else {
664 				RETVAL_FALSE;
665 			}
666 		}
667 	}
668 
669 	if (is_file) {
670 		zend_string_release_ex(data, 0);
671 	}
672 
673 	tidyBufFree(errbuf);
674 	efree(errbuf);
675 	tidyRelease(doc);
676 }
677 
php_tidy_file_to_mem(char * filename,zend_bool use_include_path)678 static zend_string *php_tidy_file_to_mem(char *filename, zend_bool use_include_path)
679 {
680 	php_stream *stream;
681 	zend_string *data = NULL;
682 
683 	if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
684 		return NULL;
685 	}
686 	if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
687 		data = ZSTR_EMPTY_ALLOC();
688 	}
689 	php_stream_close(stream);
690 
691 	return data;
692 }
693 
tidy_object_free_storage(zend_object * object)694 static void tidy_object_free_storage(zend_object *object)
695 {
696 	PHPTidyObj *intern = php_tidy_fetch_object(object);
697 
698 	zend_object_std_dtor(&intern->std);
699 
700 	if (intern->ptdoc) {
701 		intern->ptdoc->ref_count--;
702 
703 		if (intern->ptdoc->ref_count <= 0) {
704 			tidyBufFree(intern->ptdoc->errbuf);
705 			efree(intern->ptdoc->errbuf);
706 			tidyRelease(intern->ptdoc->doc);
707 			efree(intern->ptdoc);
708 		}
709 	}
710 }
711 
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)712 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
713 {
714 	PHPTidyObj *intern;
715 
716 	intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
717 	zend_object_std_init(&intern->std, class_type);
718 	object_properties_init(&intern->std, class_type);
719 
720 	switch(objtype) {
721 		case is_node:
722 			break;
723 
724 		case is_doc:
725 			intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
726 			intern->ptdoc->doc = tidyCreate();
727 			intern->ptdoc->ref_count = 1;
728 			intern->ptdoc->initialized = 0;
729 			intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
730 			tidyBufInit(intern->ptdoc->errbuf);
731 
732 			if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
733 				tidyBufFree(intern->ptdoc->errbuf);
734 				efree(intern->ptdoc->errbuf);
735 				tidyRelease(intern->ptdoc->doc);
736 				efree(intern->ptdoc);
737 				efree(intern);
738 				php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
739 			}
740 
741 			tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
742 			tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
743 
744 			TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
745 
746 			tidy_add_default_properties(intern, is_doc);
747 			break;
748 	}
749 
750 	intern->std.handlers = handlers;
751 
752 	return &intern->std;
753 }
754 
tidy_object_new_node(zend_class_entry * class_type)755 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
756 {
757 	return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
758 }
759 
tidy_object_new_doc(zend_class_entry * class_type)760 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
761 {
762 	return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
763 }
764 
tidy_instanciate(zend_class_entry * pce,zval * object)765 static zval * tidy_instanciate(zend_class_entry *pce, zval *object)
766 {
767 	object_init_ex(object, pce);
768 	return object;
769 }
770 
tidy_doc_cast_handler(zval * in,zval * out,int type)771 static int tidy_doc_cast_handler(zval *in, zval *out, int type)
772 {
773 	TidyBuffer output;
774 	PHPTidyObj *obj;
775 
776 	switch (type) {
777 		case IS_LONG:
778 		case _IS_NUMBER:
779 			ZVAL_LONG(out, 0);
780 			break;
781 
782 		case IS_DOUBLE:
783 			ZVAL_DOUBLE(out, 0);
784 			break;
785 
786 		case _IS_BOOL:
787 			ZVAL_TRUE(out);
788 			break;
789 
790 		case IS_STRING:
791 			obj = Z_TIDY_P(in);
792 			tidyBufInit(&output);
793 			tidySaveBuffer (obj->ptdoc->doc, &output);
794 			if (output.size) {
795 				ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
796 			} else {
797 				ZVAL_EMPTY_STRING(out);
798 			}
799 			tidyBufFree(&output);
800 			break;
801 
802 		default:
803 			return FAILURE;
804 	}
805 
806 	return SUCCESS;
807 }
808 
tidy_node_cast_handler(zval * in,zval * out,int type)809 static int tidy_node_cast_handler(zval *in, zval *out, int type)
810 {
811 	TidyBuffer buf;
812 	PHPTidyObj *obj;
813 
814 	switch(type) {
815 		case IS_LONG:
816 		case _IS_NUMBER:
817 			ZVAL_LONG(out, 0);
818 			break;
819 
820 		case IS_DOUBLE:
821 			ZVAL_DOUBLE(out, 0);
822 			break;
823 
824 		case _IS_BOOL:
825 			ZVAL_TRUE(out);
826 			break;
827 
828 		case IS_STRING:
829 			obj = Z_TIDY_P(in);
830 			tidyBufInit(&buf);
831 			if (obj->ptdoc) {
832 				tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
833 				ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
834 			} else {
835 				ZVAL_EMPTY_STRING(out);
836 			}
837 			tidyBufFree(&buf);
838 			break;
839 
840 		default:
841 			return FAILURE;
842 	}
843 
844 	return SUCCESS;
845 }
846 
tidy_doc_update_properties(PHPTidyObj * obj)847 static void tidy_doc_update_properties(PHPTidyObj *obj)
848 {
849 
850 	TidyBuffer output;
851 	zval temp;
852 
853 	tidyBufInit(&output);
854 	tidySaveBuffer (obj->ptdoc->doc, &output);
855 
856 	if (output.size) {
857 		if (!obj->std.properties) {
858 			rebuild_object_properties(&obj->std);
859 		}
860 		ZVAL_STRINGL(&temp, (char*)output.bp, output.size-1);
861 		zend_hash_str_update(obj->std.properties, "value", sizeof("value") - 1, &temp);
862 	}
863 
864 	tidyBufFree(&output);
865 
866 	if (obj->ptdoc->errbuf->size) {
867 		if (!obj->std.properties) {
868 			rebuild_object_properties(&obj->std);
869 		}
870 		ZVAL_STRINGL(&temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
871 		zend_hash_str_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer") - 1, &temp);
872 	}
873 }
874 
tidy_add_default_properties(PHPTidyObj * obj,tidy_obj_type type)875 static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
876 {
877 
878 	TidyBuffer buf;
879 	TidyAttr	tempattr;
880 	TidyNode	tempnode;
881 	zval attribute, children, temp;
882 	PHPTidyObj *newobj;
883 
884 	switch(type) {
885 
886 		case is_node:
887 			if (!obj->std.properties) {
888 				rebuild_object_properties(&obj->std);
889 			}
890 			tidyBufInit(&buf);
891 			tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
892 			ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
893 			tidyBufFree(&buf);
894 
895 			ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
896 			ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
897 			ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
898 			ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
899 			ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
900 
901 			switch(tidyNodeGetType(obj->node)) {
902 				case TidyNode_Root:
903 				case TidyNode_DocType:
904 				case TidyNode_Text:
905 				case TidyNode_Comment:
906 					break;
907 
908 				default:
909 					ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
910 			}
911 
912 			tempattr = tidyAttrFirst(obj->node);
913 
914 			if (tempattr) {
915 				char *name, *val;
916 				array_init(&attribute);
917 
918 				do {
919 					name = (char *)tidyAttrName(tempattr);
920 					val = (char *)tidyAttrValue(tempattr);
921 					if (name && val) {
922 						add_assoc_string(&attribute, name, val);
923 					}
924 				} while((tempattr = tidyAttrNext(tempattr)));
925 			} else {
926 				ZVAL_NULL(&attribute);
927 			}
928 			zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);
929 
930 			tempnode = tidyGetChild(obj->node);
931 
932 			if (tempnode) {
933 				array_init(&children);
934 				do {
935 					tidy_instanciate(tidy_ce_node, &temp);
936 					newobj = Z_TIDY_P(&temp);
937 					newobj->node = tempnode;
938 					newobj->type = is_node;
939 					newobj->ptdoc = obj->ptdoc;
940 					newobj->ptdoc->ref_count++;
941 
942 					tidy_add_default_properties(newobj, is_node);
943 					add_next_index_zval(&children, &temp);
944 
945 				} while((tempnode = tidyGetNext(tempnode)));
946 
947 			} else {
948 				ZVAL_NULL(&children);
949 			}
950 
951 			zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children);
952 
953 			break;
954 
955 		case is_doc:
956 			if (!obj->std.properties) {
957 				rebuild_object_properties(&obj->std);
958 			}
959 			ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
960 			ADD_PROPERTY_NULL(obj->std.properties, value);
961 			break;
962 	}
963 }
964 
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)965 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
966 {
967 	*type = tidyOptGetType(opt);
968 
969 	switch (*type) {
970 		case TidyString: {
971 			char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
972 			if (val) {
973 				return (void *) zend_string_init(val, strlen(val), 0);
974 			} else {
975 				return (void *) ZSTR_EMPTY_ALLOC();
976 			}
977 		}
978 			break;
979 
980 		case TidyInteger:
981 			return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
982 			break;
983 
984 		case TidyBoolean:
985 			return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
986 			break;
987 	}
988 
989 	/* should not happen */
990 	return NULL;
991 }
992 
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)993 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
994 {
995 	PHPTidyObj *newobj;
996 	TidyNode node;
997 	TIDY_FETCH_OBJECT;
998 
999 	switch (node_type) {
1000 		case is_root_node:
1001 			node = tidyGetRoot(obj->ptdoc->doc);
1002 			break;
1003 
1004 		case is_html_node:
1005 			node = tidyGetHtml(obj->ptdoc->doc);
1006 			break;
1007 
1008 		case is_head_node:
1009 			node = tidyGetHead(obj->ptdoc->doc);
1010 			break;
1011 
1012 		case is_body_node:
1013 			node = tidyGetBody(obj->ptdoc->doc);
1014 			break;
1015 
1016 		default:
1017 			RETURN_NULL();
1018 			break;
1019 	}
1020 
1021 	if (!node) {
1022 		RETURN_NULL();
1023 	}
1024 
1025 	tidy_instanciate(tidy_ce_node, return_value);
1026 	newobj = Z_TIDY_P(return_value);
1027 	newobj->type  = is_node;
1028 	newobj->ptdoc = obj->ptdoc;
1029 	newobj->node  = node;
1030 	newobj->ptdoc->ref_count++;
1031 
1032 	tidy_add_default_properties(newobj, is_node);
1033 }
1034 
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)1035 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
1036 {
1037 	zval *opt_val;
1038 	zend_string *opt_name;
1039 
1040 	ZEND_HASH_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
1041 		if (opt_name == NULL) {
1042 			continue;
1043 		}
1044 		_php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
1045 	} ZEND_HASH_FOREACH_END();
1046 
1047 	return SUCCESS;
1048 }
1049 
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)1050 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
1051 {
1052 	TidyBuffer buf;
1053 
1054 	if(enc) {
1055 		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
1056 			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
1057 			return FAILURE;
1058 		}
1059 	}
1060 
1061 	obj->ptdoc->initialized = 1;
1062 
1063 	tidyBufInit(&buf);
1064 	tidyBufAttach(&buf, (byte *) string, len);
1065 	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
1066 		php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
1067 		return FAILURE;
1068 	}
1069 	tidy_doc_update_properties(obj);
1070 
1071 	return SUCCESS;
1072 }
1073 
PHP_MINIT_FUNCTION(tidy)1074 static PHP_MINIT_FUNCTION(tidy)
1075 {
1076 	tidySetMallocCall(php_tidy_malloc);
1077 	tidySetReallocCall(php_tidy_realloc);
1078 	tidySetFreeCall(php_tidy_free);
1079 	tidySetPanicCall(php_tidy_panic);
1080 
1081 	REGISTER_INI_ENTRIES();
1082 	REGISTER_TIDY_CLASS(tidy, doc,	NULL, 0);
1083 	REGISTER_TIDY_CLASS(tidyNode, node,	NULL, ZEND_ACC_FINAL);
1084 
1085 	tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
1086 	tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
1087 
1088 	tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
1089 	tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
1090 
1091 	_php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU);
1092 	_php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU);
1093 
1094 	php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
1095 
1096 	return SUCCESS;
1097 }
1098 
PHP_RINIT_FUNCTION(tidy)1099 static PHP_RINIT_FUNCTION(tidy)
1100 {
1101 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
1102 	ZEND_TSRMLS_CACHE_UPDATE();
1103 #endif
1104 
1105 	php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
1106 
1107 	return SUCCESS;
1108 }
1109 
PHP_RSHUTDOWN_FUNCTION(tidy)1110 static PHP_RSHUTDOWN_FUNCTION(tidy)
1111 {
1112 	TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
1113 
1114 	return SUCCESS;
1115 }
1116 
PHP_MSHUTDOWN_FUNCTION(tidy)1117 static PHP_MSHUTDOWN_FUNCTION(tidy)
1118 {
1119 	UNREGISTER_INI_ENTRIES();
1120 	return SUCCESS;
1121 }
1122 
PHP_MINFO_FUNCTION(tidy)1123 static PHP_MINFO_FUNCTION(tidy)
1124 {
1125 	php_info_print_table_start();
1126 	php_info_print_table_row(2, "Tidy support", "enabled");
1127 #if HAVE_TIDYBUFFIO_H
1128 	php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
1129 #elif HAVE_TIDYP_H
1130 	php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
1131 #endif
1132 #if HAVE_TIDYRELEASEDATE
1133 	php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
1134 #endif
1135 	php_info_print_table_end();
1136 
1137 	DISPLAY_INI_ENTRIES();
1138 }
1139 
PHP_INI_MH(php_tidy_set_clean_output)1140 static PHP_INI_MH(php_tidy_set_clean_output)
1141 {
1142 	int status;
1143 	zend_bool value;
1144 
1145 	if (ZSTR_LEN(new_value)==2 && strcasecmp("on", ZSTR_VAL(new_value))==0) {
1146 		value = (zend_bool) 1;
1147 	} else if (ZSTR_LEN(new_value)==3 && strcasecmp("yes", ZSTR_VAL(new_value))==0) {
1148 		value = (zend_bool) 1;
1149 	} else if (ZSTR_LEN(new_value)==4 && strcasecmp("true", ZSTR_VAL(new_value))==0) {
1150 		value = (zend_bool) 1;
1151 	} else {
1152 		value = (zend_bool) atoi(ZSTR_VAL(new_value));
1153 	}
1154 
1155 	if (stage == PHP_INI_STAGE_RUNTIME) {
1156 		status = php_output_get_status();
1157 
1158 		if (value && (status & PHP_OUTPUT_WRITTEN)) {
1159 			php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
1160 			return FAILURE;
1161 		}
1162 		if (status & PHP_OUTPUT_SENT) {
1163 			php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
1164 			return FAILURE;
1165 		}
1166 	}
1167 
1168 	status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1169 
1170 	if (stage == PHP_INI_STAGE_RUNTIME && value) {
1171 		if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
1172 			php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
1173 		}
1174 	}
1175 
1176 	return status;
1177 }
1178 
1179 /*
1180  * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
1181  */
1182 
php_tidy_clean_output_start(const char * name,size_t name_len)1183 static void php_tidy_clean_output_start(const char *name, size_t name_len)
1184 {
1185 	php_output_handler *h;
1186 
1187 	if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
1188 		php_output_handler_start(h);
1189 	}
1190 }
1191 
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)1192 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
1193 {
1194 	if (chunk_size) {
1195 		php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
1196 		return NULL;
1197 	}
1198 	if (!TG(clean_output)) {
1199 		TG(clean_output) = 1;
1200 	}
1201 	return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
1202 }
1203 
php_tidy_output_handler(void ** nothing,php_output_context * output_context)1204 static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
1205 {
1206 	int status = FAILURE;
1207 	TidyDoc doc;
1208 	TidyBuffer inbuf, outbuf, errbuf;
1209 
1210 	if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
1211 		doc = tidyCreate();
1212 		tidyBufInit(&errbuf);
1213 
1214 		if (0 == tidySetErrorBuffer(doc, &errbuf)) {
1215 			tidyOptSetBool(doc, TidyForceOutput, yes);
1216 			tidyOptSetBool(doc, TidyMark, no);
1217 
1218 			if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
1219 				php_error_docref(NULL, E_WARNING, "Input string is too long");
1220 				return status;
1221 			}
1222 
1223 			TIDY_SET_DEFAULT_CONFIG(doc);
1224 
1225 			tidyBufInit(&inbuf);
1226 			tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
1227 
1228 			if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
1229 				tidyBufInit(&outbuf);
1230 				tidySaveBuffer(doc, &outbuf);
1231 				FIX_BUFFER(&outbuf);
1232 				output_context->out.data = (char *) outbuf.bp;
1233 				output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
1234 				output_context->out.free = 1;
1235 				status = SUCCESS;
1236 			}
1237 		}
1238 
1239 		tidyRelease(doc);
1240 		tidyBufFree(&errbuf);
1241 	}
1242 
1243 	return status;
1244 }
1245 
1246 /* {{{ proto bool tidy_parse_string(string input [, mixed config_options [, string encoding]])
1247    Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1248 static PHP_FUNCTION(tidy_parse_string)
1249 {
1250 	char *enc = NULL;
1251 	size_t enc_len = 0;
1252 	zend_string *input;
1253 	zval *options = NULL;
1254 	PHPTidyObj *obj;
1255 
1256 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zs", &input, &options, &enc, &enc_len) == FAILURE) {
1257 		RETURN_FALSE;
1258 	}
1259 
1260 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1261 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1262 		RETURN_FALSE;
1263 	}
1264 
1265 	tidy_instanciate(tidy_ce_doc, return_value);
1266 	obj = Z_TIDY_P(return_value);
1267 
1268 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1269 
1270 	if (php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1271 		zval_ptr_dtor(return_value);
1272 		RETURN_FALSE;
1273 	}
1274 }
1275 /* }}} */
1276 
1277 /* {{{ proto string tidy_get_error_buffer()
1278    Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1279 static PHP_FUNCTION(tidy_get_error_buffer)
1280 {
1281 	TIDY_FETCH_OBJECT;
1282 
1283 	if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1284 		RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1285 	} else {
1286 		RETURN_FALSE;
1287 	}
1288 }
1289 /* }}} */
1290 
1291 /* {{{ proto string tidy_get_output(tidy tidy)
1292    Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1293 static PHP_FUNCTION(tidy_get_output)
1294 {
1295 	TidyBuffer output;
1296 	TIDY_FETCH_OBJECT;
1297 
1298 	tidyBufInit(&output);
1299 	tidySaveBuffer(obj->ptdoc->doc, &output);
1300 	FIX_BUFFER(&output);
1301 	RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1302 	tidyBufFree(&output);
1303 }
1304 /* }}} */
1305 
1306 /* {{{ proto bool tidy_parse_file(string file [, mixed config_options [, string encoding [, bool use_include_path]]])
1307    Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1308 static PHP_FUNCTION(tidy_parse_file)
1309 {
1310 	char *enc = NULL;
1311 	size_t enc_len = 0;
1312 	zend_bool use_include_path = 0;
1313 	zend_string *inputfile, *contents;
1314 	zval *options = NULL;
1315 
1316 	PHPTidyObj *obj;
1317 
1318 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &inputfile,
1319 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1320 		RETURN_FALSE;
1321 	}
1322 
1323 	tidy_instanciate(tidy_ce_doc, return_value);
1324 	obj = Z_TIDY_P(return_value);
1325 
1326 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1327 		php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
1328 		RETURN_FALSE;
1329 	}
1330 
1331 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1332 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1333 		RETURN_FALSE;
1334 	}
1335 
1336 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1337 
1338 	if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1339 		zval_ptr_dtor(return_value);
1340 		RETVAL_FALSE;
1341 	}
1342 
1343 	zend_string_release_ex(contents, 0);
1344 }
1345 /* }}} */
1346 
1347 /* {{{ proto bool tidy_clean_repair(tidy tidy)
1348    Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1349 static PHP_FUNCTION(tidy_clean_repair)
1350 {
1351 	TIDY_FETCH_OBJECT;
1352 
1353 	if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1354 		tidy_doc_update_properties(obj);
1355 		RETURN_TRUE;
1356 	}
1357 
1358 	RETURN_FALSE;
1359 }
1360 /* }}} */
1361 
1362 /* {{{ proto bool tidy_repair_string(string data [, mixed config_file [, string encoding]])
1363    Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1364 static PHP_FUNCTION(tidy_repair_string)
1365 {
1366 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, FALSE);
1367 }
1368 /* }}} */
1369 
1370 /* {{{ proto bool tidy_repair_file(string filename [, mixed config_file [, string encoding [, bool use_include_path]]])
1371    Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1372 static PHP_FUNCTION(tidy_repair_file)
1373 {
1374 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, TRUE);
1375 }
1376 /* }}} */
1377 
1378 /* {{{ proto bool tidy_diagnose()
1379    Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1380 static PHP_FUNCTION(tidy_diagnose)
1381 {
1382 	TIDY_FETCH_OBJECT;
1383 
1384 	if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1385 		tidy_doc_update_properties(obj);
1386 		RETURN_TRUE;
1387 	}
1388 
1389 	RETURN_FALSE;
1390 }
1391 /* }}} */
1392 
1393 /* {{{ proto string tidy_get_release()
1394    Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1395 static PHP_FUNCTION(tidy_get_release)
1396 {
1397 	if (zend_parse_parameters_none() == FAILURE) {
1398 		return;
1399 	}
1400 
1401 #if HAVE_TIDYRELEASEDATE
1402 	RETURN_STRING((char *)tidyReleaseDate());
1403 #else
1404 	RETURN_STRING((char *)"unknown");
1405 #endif
1406 }
1407 /* }}} */
1408 
1409 
1410 #if HAVE_TIDYOPTGETDOC
1411 /* {{{ proto string tidy_get_opt_doc(tidy resource, string optname)
1412    Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1413 static PHP_FUNCTION(tidy_get_opt_doc)
1414 {
1415 	PHPTidyObj *obj;
1416 	char *optval, *optname;
1417 	size_t optname_len;
1418 	TidyOption opt;
1419 
1420 	TIDY_SET_CONTEXT;
1421 
1422 	if (object) {
1423 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &optname, &optname_len) == FAILURE) {
1424 			RETURN_FALSE;
1425 		}
1426 	} else {
1427 		if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1428 			RETURN_FALSE;
1429 		}
1430 	}
1431 
1432 	obj = Z_TIDY_P(object);
1433 
1434 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1435 
1436 	if (!opt) {
1437 		php_error_docref(NULL, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
1438 		RETURN_FALSE;
1439 	}
1440 
1441 	if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1442 		RETURN_STRING(optval);
1443 	}
1444 
1445 	RETURN_FALSE;
1446 }
1447 /* }}} */
1448 #endif
1449 
1450 
1451 /* {{{ proto array tidy_get_config(tidy tidy)
1452    Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1453 static PHP_FUNCTION(tidy_get_config)
1454 {
1455 	TidyIterator itOpt;
1456 	char *opt_name;
1457 	void *opt_value;
1458 	TidyOptionType optt;
1459 
1460 	TIDY_FETCH_OBJECT;
1461 
1462 	itOpt = tidyGetOptionList(obj->ptdoc->doc);
1463 
1464 	array_init(return_value);
1465 
1466 	while (itOpt) {
1467 		TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1468 
1469 		opt_name = (char *)tidyOptGetName(opt);
1470 		opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1471 		switch (optt) {
1472 			case TidyString:
1473 				add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1474 				break;
1475 
1476 			case TidyInteger:
1477 				add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1478 				break;
1479 
1480 			case TidyBoolean:
1481 				add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1482 				break;
1483 		}
1484 	}
1485 
1486 	return;
1487 }
1488 /* }}} */
1489 
1490 /* {{{ proto int tidy_get_status(tidy tidy)
1491    Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1492 static PHP_FUNCTION(tidy_get_status)
1493 {
1494 	TIDY_FETCH_OBJECT;
1495 
1496 	RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1497 }
1498 /* }}} */
1499 
1500 /* {{{ proto int tidy_get_html_ver(tidy tidy)
1501    Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1502 static PHP_FUNCTION(tidy_get_html_ver)
1503 {
1504 	TIDY_FETCH_INITIALIZED_OBJECT;
1505 
1506 	RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1507 }
1508 /* }}} */
1509 
1510 /* {{{ proto bool tidy_is_xhtml(tidy tidy)
1511    Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1512 static PHP_FUNCTION(tidy_is_xhtml)
1513 {
1514 	TIDY_FETCH_INITIALIZED_OBJECT;
1515 
1516 	RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1517 }
1518 /* }}} */
1519 
1520 /* {{{ proto bool tidy_is_xml(tidy tidy)
1521    Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1522 static PHP_FUNCTION(tidy_is_xml)
1523 {
1524 	TIDY_FETCH_INITIALIZED_OBJECT;
1525 
1526 	RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1527 }
1528 /* }}} */
1529 
1530 /* {{{ proto int tidy_error_count(tidy tidy)
1531    Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1532 static PHP_FUNCTION(tidy_error_count)
1533 {
1534 	TIDY_FETCH_OBJECT;
1535 
1536 	RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1537 }
1538 /* }}} */
1539 
1540 /* {{{ proto int tidy_warning_count(tidy tidy)
1541    Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1542 static PHP_FUNCTION(tidy_warning_count)
1543 {
1544 	TIDY_FETCH_OBJECT;
1545 
1546 	RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1547 }
1548 /* }}} */
1549 
1550 /* {{{ proto int tidy_access_count(tidy tidy)
1551    Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1552 static PHP_FUNCTION(tidy_access_count)
1553 {
1554 	TIDY_FETCH_OBJECT;
1555 
1556 	RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1557 }
1558 /* }}} */
1559 
1560 /* {{{ proto int tidy_config_count(tidy tidy)
1561    Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1562 static PHP_FUNCTION(tidy_config_count)
1563 {
1564 	TIDY_FETCH_OBJECT;
1565 
1566 	RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1567 }
1568 /* }}} */
1569 
1570 /* {{{ proto mixed tidy_getopt(string option)
1571    Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1572 static PHP_FUNCTION(tidy_getopt)
1573 {
1574 	PHPTidyObj *obj;
1575 	char *optname;
1576 	void *optval;
1577 	size_t optname_len;
1578 	TidyOption opt;
1579 	TidyOptionType optt;
1580 
1581 	TIDY_SET_CONTEXT;
1582 
1583 	if (object) {
1584 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &optname, &optname_len) == FAILURE) {
1585 			RETURN_FALSE;
1586 		}
1587 	} else {
1588 		if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1589 			RETURN_FALSE;
1590 		}
1591 	}
1592 
1593 	obj = Z_TIDY_P(object);
1594 
1595 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1596 
1597 	if (!opt) {
1598 		php_error_docref(NULL, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
1599 		RETURN_FALSE;
1600 	}
1601 
1602 	optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1603 	switch (optt) {
1604 		case TidyString:
1605 			RETVAL_STR((zend_string*)optval);
1606 			return;
1607 
1608 		case TidyInteger:
1609 			RETURN_LONG((zend_long)optval);
1610 			break;
1611 
1612 		case TidyBoolean:
1613 			if (optval) {
1614 				RETURN_TRUE;
1615 			} else {
1616 				RETURN_FALSE;
1617 			}
1618 			break;
1619 
1620 		default:
1621 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1622 			break;
1623 	}
1624 
1625 	RETURN_FALSE;
1626 }
1627 /* }}} */
1628 
TIDY_DOC_METHOD(__construct)1629 static TIDY_DOC_METHOD(__construct)
1630 {
1631 	char *enc = NULL;
1632 	size_t enc_len = 0;
1633 	zend_bool use_include_path = 0;
1634 	zval *options = NULL;
1635 	zend_string *contents, *inputfile = NULL;
1636 
1637 	PHPTidyObj *obj;
1638 	TIDY_SET_CONTEXT;
1639 
1640 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|Pzsb", &inputfile,
1641 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1642 		RETURN_FALSE;
1643 	}
1644 
1645 	obj = Z_TIDY_P(object);
1646 
1647 	if (inputfile) {
1648 		if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1649 			php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
1650 			return;
1651 		}
1652 
1653 		if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1654 			php_error_docref(NULL, E_WARNING, "Input string is too long");
1655 			RETURN_FALSE;
1656 		}
1657 
1658 		TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1659 
1660 		php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1661 
1662 		zend_string_release_ex(contents, 0);
1663 	}
1664 }
1665 
TIDY_DOC_METHOD(parseFile)1666 static TIDY_DOC_METHOD(parseFile)
1667 {
1668 	char *enc = NULL;
1669 	size_t enc_len = 0;
1670 	zend_bool use_include_path = 0;
1671 	zval *options = NULL;
1672 	zend_string *inputfile, *contents;
1673 	PHPTidyObj *obj;
1674 
1675 	TIDY_SET_CONTEXT;
1676 
1677 	obj = Z_TIDY_P(object);
1678 
1679 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &inputfile,
1680 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1681 		RETURN_FALSE;
1682 	}
1683 
1684 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1685 		php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
1686 		RETURN_FALSE;
1687 	}
1688 
1689 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1690 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1691 		RETURN_FALSE;
1692 	}
1693 
1694 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1695 
1696 	if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1697 		RETVAL_FALSE;
1698 	} else {
1699 		RETVAL_TRUE;
1700 	}
1701 
1702 	zend_string_release_ex(contents, 0);
1703 }
1704 
TIDY_DOC_METHOD(parseString)1705 static TIDY_DOC_METHOD(parseString)
1706 {
1707 	char *enc = NULL;
1708 	size_t enc_len = 0;
1709 	zval *options = NULL;
1710 	PHPTidyObj *obj;
1711 	zend_string *input;
1712 
1713 	TIDY_SET_CONTEXT;
1714 
1715 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zs", &input, &options, &enc, &enc_len) == FAILURE) {
1716 		RETURN_FALSE;
1717 	}
1718 
1719 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1720 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1721 		RETURN_FALSE;
1722 	}
1723 
1724 	obj = Z_TIDY_P(object);
1725 
1726 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1727 
1728 	if(php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1729 		RETURN_TRUE;
1730 	}
1731 
1732 	RETURN_FALSE;
1733 }
1734 
1735 
1736 /* {{{ proto TidyNode tidy_get_root()
1737    Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1738 static PHP_FUNCTION(tidy_get_root)
1739 {
1740 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1741 }
1742 /* }}} */
1743 
1744 /* {{{ proto TidyNode tidy_get_html()
1745    Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1746 static PHP_FUNCTION(tidy_get_html)
1747 {
1748 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1749 }
1750 /* }}} */
1751 
1752 /* {{{ proto TidyNode tidy_get_head()
1753    Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1754 static PHP_FUNCTION(tidy_get_head)
1755 {
1756 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1757 }
1758 /* }}} */
1759 
1760 /* {{{ proto TidyNode tidy_get_body(tidy tidy)
1761    Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1762 static PHP_FUNCTION(tidy_get_body)
1763 {
1764 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1765 }
1766 /* }}} */
1767 
1768 /* {{{ proto bool tidyNode::hasChildren()
1769    Returns true if this node has children */
TIDY_NODE_METHOD(hasChildren)1770 static TIDY_NODE_METHOD(hasChildren)
1771 {
1772 	TIDY_FETCH_ONLY_OBJECT;
1773 
1774 	if (tidyGetChild(obj->node)) {
1775 		RETURN_TRUE;
1776 	} else {
1777 		RETURN_FALSE;
1778 	}
1779 }
1780 /* }}} */
1781 
1782 /* {{{ proto bool tidyNode::hasSiblings()
1783    Returns true if this node has siblings */
TIDY_NODE_METHOD(hasSiblings)1784 static TIDY_NODE_METHOD(hasSiblings)
1785 {
1786 	TIDY_FETCH_ONLY_OBJECT;
1787 
1788 	if (obj->node && tidyGetNext(obj->node)) {
1789 		RETURN_TRUE;
1790 	} else {
1791 		RETURN_FALSE;
1792 	}
1793 }
1794 /* }}} */
1795 
1796 /* {{{ proto bool tidyNode::isComment()
1797    Returns true if this node represents a comment */
TIDY_NODE_METHOD(isComment)1798 static TIDY_NODE_METHOD(isComment)
1799 {
1800 	TIDY_FETCH_ONLY_OBJECT;
1801 
1802 	if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1803 		RETURN_TRUE;
1804 	} else {
1805 		RETURN_FALSE;
1806 	}
1807 }
1808 /* }}} */
1809 
1810 /* {{{ proto bool tidyNode::isHtml()
1811    Returns true if this node is part of a HTML document */
TIDY_NODE_METHOD(isHtml)1812 static TIDY_NODE_METHOD(isHtml)
1813 {
1814 	TIDY_FETCH_ONLY_OBJECT;
1815 
1816 	switch (tidyNodeGetType(obj->node)) {
1817 		case TidyNode_Start:
1818 		case TidyNode_End:
1819 		case TidyNode_StartEnd:
1820 			RETURN_TRUE;
1821 		default:
1822 			RETURN_FALSE;
1823 	}
1824 }
1825 /* }}} */
1826 
1827 /* {{{ proto bool tidyNode::isText()
1828    Returns true if this node represents text (no markup) */
TIDY_NODE_METHOD(isText)1829 static TIDY_NODE_METHOD(isText)
1830 {
1831 	TIDY_FETCH_ONLY_OBJECT;
1832 
1833 	if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1834 		RETURN_TRUE;
1835 	} else {
1836 		RETURN_FALSE;
1837 	}
1838 }
1839 /* }}} */
1840 
1841 /* {{{ proto bool tidyNode::isJste()
1842    Returns true if this node is JSTE */
TIDY_NODE_METHOD(isJste)1843 static TIDY_NODE_METHOD(isJste)
1844 {
1845 	TIDY_FETCH_ONLY_OBJECT;
1846 
1847 	if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1848 		RETURN_TRUE;
1849 	} else {
1850 		RETURN_FALSE;
1851 	}
1852 }
1853 /* }}} */
1854 
1855 /* {{{ proto bool tidyNode::isAsp()
1856    Returns true if this node is ASP */
TIDY_NODE_METHOD(isAsp)1857 static TIDY_NODE_METHOD(isAsp)
1858 {
1859 	TIDY_FETCH_ONLY_OBJECT;
1860 
1861 	if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1862 		RETURN_TRUE;
1863 	} else {
1864 		RETURN_FALSE;
1865 	}
1866 }
1867 /* }}} */
1868 
1869 /* {{{ proto bool tidyNode::isPhp()
1870    Returns true if this node is PHP */
TIDY_NODE_METHOD(isPhp)1871 static TIDY_NODE_METHOD(isPhp)
1872 {
1873 	TIDY_FETCH_ONLY_OBJECT;
1874 
1875 	if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1876 		RETURN_TRUE;
1877 	} else {
1878 		RETURN_FALSE;
1879 	}
1880 }
1881 /* }}} */
1882 
1883 /* {{{ proto tidyNode tidyNode::getParent()
1884    Returns the parent node if available or NULL */
TIDY_NODE_METHOD(getParent)1885 static TIDY_NODE_METHOD(getParent)
1886 {
1887 	TidyNode	parent_node;
1888 	PHPTidyObj *newobj;
1889 	TIDY_FETCH_ONLY_OBJECT;
1890 
1891 	parent_node = tidyGetParent(obj->node);
1892 	if(parent_node) {
1893 		tidy_instanciate(tidy_ce_node, return_value);
1894 		newobj = Z_TIDY_P(return_value);
1895 		newobj->node = parent_node;
1896 		newobj->type = is_node;
1897 		newobj->ptdoc = obj->ptdoc;
1898 		newobj->ptdoc->ref_count++;
1899 		tidy_add_default_properties(newobj, is_node);
1900 	} else {
1901 		ZVAL_NULL(return_value);
1902 	}
1903 }
1904 /* }}} */
1905 
1906 
1907 /* {{{ proto tidyNode::__construct()
1908          __constructor for tidyNode. */
TIDY_NODE_METHOD(__construct)1909 static TIDY_NODE_METHOD(__construct)
1910 {
1911 	zend_throw_error(NULL, "You should not create a tidyNode manually");
1912 }
1913 /* }}} */
1914 
_php_tidy_register_nodetypes(INIT_FUNC_ARGS)1915 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS)
1916 {
1917 	TIDY_NODE_CONST(ROOT, Root);
1918 	TIDY_NODE_CONST(DOCTYPE, DocType);
1919 	TIDY_NODE_CONST(COMMENT, Comment);
1920 	TIDY_NODE_CONST(PROCINS, ProcIns);
1921 	TIDY_NODE_CONST(TEXT, Text);
1922 	TIDY_NODE_CONST(START, Start);
1923 	TIDY_NODE_CONST(END, End);
1924 	TIDY_NODE_CONST(STARTEND, StartEnd);
1925 	TIDY_NODE_CONST(CDATA, CDATA);
1926 	TIDY_NODE_CONST(SECTION, Section);
1927 	TIDY_NODE_CONST(ASP, Asp);
1928 	TIDY_NODE_CONST(JSTE, Jste);
1929 	TIDY_NODE_CONST(PHP, Php);
1930 	TIDY_NODE_CONST(XMLDECL, XmlDecl);
1931 }
1932 
_php_tidy_register_tags(INIT_FUNC_ARGS)1933 static void _php_tidy_register_tags(INIT_FUNC_ARGS)
1934 {
1935 	TIDY_TAG_CONST(UNKNOWN);
1936 	TIDY_TAG_CONST(A);
1937 	TIDY_TAG_CONST(ABBR);
1938 	TIDY_TAG_CONST(ACRONYM);
1939 	TIDY_TAG_CONST(ADDRESS);
1940 	TIDY_TAG_CONST(ALIGN);
1941 	TIDY_TAG_CONST(APPLET);
1942 	TIDY_TAG_CONST(AREA);
1943 	TIDY_TAG_CONST(B);
1944 	TIDY_TAG_CONST(BASE);
1945 	TIDY_TAG_CONST(BASEFONT);
1946 	TIDY_TAG_CONST(BDO);
1947 	TIDY_TAG_CONST(BGSOUND);
1948 	TIDY_TAG_CONST(BIG);
1949 	TIDY_TAG_CONST(BLINK);
1950 	TIDY_TAG_CONST(BLOCKQUOTE);
1951 	TIDY_TAG_CONST(BODY);
1952 	TIDY_TAG_CONST(BR);
1953 	TIDY_TAG_CONST(BUTTON);
1954 	TIDY_TAG_CONST(CAPTION);
1955 	TIDY_TAG_CONST(CENTER);
1956 	TIDY_TAG_CONST(CITE);
1957 	TIDY_TAG_CONST(CODE);
1958 	TIDY_TAG_CONST(COL);
1959 	TIDY_TAG_CONST(COLGROUP);
1960 	TIDY_TAG_CONST(COMMENT);
1961 	TIDY_TAG_CONST(DD);
1962 	TIDY_TAG_CONST(DEL);
1963 	TIDY_TAG_CONST(DFN);
1964 	TIDY_TAG_CONST(DIR);
1965 	TIDY_TAG_CONST(DIV);
1966 	TIDY_TAG_CONST(DL);
1967 	TIDY_TAG_CONST(DT);
1968 	TIDY_TAG_CONST(EM);
1969 	TIDY_TAG_CONST(EMBED);
1970 	TIDY_TAG_CONST(FIELDSET);
1971 	TIDY_TAG_CONST(FONT);
1972 	TIDY_TAG_CONST(FORM);
1973 	TIDY_TAG_CONST(FRAME);
1974 	TIDY_TAG_CONST(FRAMESET);
1975 	TIDY_TAG_CONST(H1);
1976 	TIDY_TAG_CONST(H2);
1977 	TIDY_TAG_CONST(H3);
1978 	TIDY_TAG_CONST(H4);
1979 	TIDY_TAG_CONST(H5);
1980 	TIDY_TAG_CONST(H6);
1981 	TIDY_TAG_CONST(HEAD);
1982 	TIDY_TAG_CONST(HR);
1983 	TIDY_TAG_CONST(HTML);
1984 	TIDY_TAG_CONST(I);
1985 	TIDY_TAG_CONST(IFRAME);
1986 	TIDY_TAG_CONST(ILAYER);
1987 	TIDY_TAG_CONST(IMG);
1988 	TIDY_TAG_CONST(INPUT);
1989 	TIDY_TAG_CONST(INS);
1990 	TIDY_TAG_CONST(ISINDEX);
1991 	TIDY_TAG_CONST(KBD);
1992 	TIDY_TAG_CONST(KEYGEN);
1993 	TIDY_TAG_CONST(LABEL);
1994 	TIDY_TAG_CONST(LAYER);
1995 	TIDY_TAG_CONST(LEGEND);
1996 	TIDY_TAG_CONST(LI);
1997 	TIDY_TAG_CONST(LINK);
1998 	TIDY_TAG_CONST(LISTING);
1999 	TIDY_TAG_CONST(MAP);
2000 	TIDY_TAG_CONST(MARQUEE);
2001 	TIDY_TAG_CONST(MENU);
2002 	TIDY_TAG_CONST(META);
2003 	TIDY_TAG_CONST(MULTICOL);
2004 	TIDY_TAG_CONST(NOBR);
2005 	TIDY_TAG_CONST(NOEMBED);
2006 	TIDY_TAG_CONST(NOFRAMES);
2007 	TIDY_TAG_CONST(NOLAYER);
2008 	TIDY_TAG_CONST(NOSAVE);
2009 	TIDY_TAG_CONST(NOSCRIPT);
2010 	TIDY_TAG_CONST(OBJECT);
2011 	TIDY_TAG_CONST(OL);
2012 	TIDY_TAG_CONST(OPTGROUP);
2013 	TIDY_TAG_CONST(OPTION);
2014 	TIDY_TAG_CONST(P);
2015 	TIDY_TAG_CONST(PARAM);
2016 	TIDY_TAG_CONST(PLAINTEXT);
2017 	TIDY_TAG_CONST(PRE);
2018 	TIDY_TAG_CONST(Q);
2019 	TIDY_TAG_CONST(RB);
2020 	TIDY_TAG_CONST(RBC);
2021 	TIDY_TAG_CONST(RP);
2022 	TIDY_TAG_CONST(RT);
2023 	TIDY_TAG_CONST(RTC);
2024 	TIDY_TAG_CONST(RUBY);
2025 	TIDY_TAG_CONST(S);
2026 	TIDY_TAG_CONST(SAMP);
2027 	TIDY_TAG_CONST(SCRIPT);
2028 	TIDY_TAG_CONST(SELECT);
2029 	TIDY_TAG_CONST(SERVER);
2030 	TIDY_TAG_CONST(SERVLET);
2031 	TIDY_TAG_CONST(SMALL);
2032 	TIDY_TAG_CONST(SPACER);
2033 	TIDY_TAG_CONST(SPAN);
2034 	TIDY_TAG_CONST(STRIKE);
2035 	TIDY_TAG_CONST(STRONG);
2036 	TIDY_TAG_CONST(STYLE);
2037 	TIDY_TAG_CONST(SUB);
2038 	TIDY_TAG_CONST(SUP);
2039 	TIDY_TAG_CONST(TABLE);
2040 	TIDY_TAG_CONST(TBODY);
2041 	TIDY_TAG_CONST(TD);
2042 	TIDY_TAG_CONST(TEXTAREA);
2043 	TIDY_TAG_CONST(TFOOT);
2044 	TIDY_TAG_CONST(TH);
2045 	TIDY_TAG_CONST(THEAD);
2046 	TIDY_TAG_CONST(TITLE);
2047 	TIDY_TAG_CONST(TR);
2048 	TIDY_TAG_CONST(TT);
2049 	TIDY_TAG_CONST(U);
2050 	TIDY_TAG_CONST(UL);
2051 	TIDY_TAG_CONST(VAR);
2052 	TIDY_TAG_CONST(WBR);
2053 	TIDY_TAG_CONST(XMP);
2054 # if HAVE_TIDYBUFFIO_H
2055 	TIDY_TAG_CONST(ARTICLE);
2056 	TIDY_TAG_CONST(ASIDE);
2057 	TIDY_TAG_CONST(AUDIO);
2058 	TIDY_TAG_CONST(BDI);
2059 	TIDY_TAG_CONST(CANVAS);
2060 	TIDY_TAG_CONST(COMMAND);
2061 	TIDY_TAG_CONST(DATALIST);
2062 	TIDY_TAG_CONST(DETAILS);
2063 	TIDY_TAG_CONST(DIALOG);
2064 	TIDY_TAG_CONST(FIGCAPTION);
2065 	TIDY_TAG_CONST(FIGURE);
2066 	TIDY_TAG_CONST(FOOTER);
2067 	TIDY_TAG_CONST(HEADER);
2068 	TIDY_TAG_CONST(HGROUP);
2069 	TIDY_TAG_CONST(MAIN);
2070 	TIDY_TAG_CONST(MARK);
2071 	TIDY_TAG_CONST(MENUITEM);
2072 	TIDY_TAG_CONST(METER);
2073 	TIDY_TAG_CONST(NAV);
2074 	TIDY_TAG_CONST(OUTPUT);
2075 	TIDY_TAG_CONST(PROGRESS);
2076 	TIDY_TAG_CONST(SECTION);
2077 	TIDY_TAG_CONST(SOURCE);
2078 	TIDY_TAG_CONST(SUMMARY);
2079 	TIDY_TAG_CONST(TEMPLATE);
2080 	TIDY_TAG_CONST(TIME);
2081 	TIDY_TAG_CONST(TRACK);
2082 	TIDY_TAG_CONST(VIDEO);
2083 # endif
2084 }
2085 
2086 #endif
2087