xref: /PHP-7.3/ext/tidy/tidy.c (revision d4bf0799)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2018 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: John Coggeshall <john@php.net>                               |
16   +----------------------------------------------------------------------+
17 */
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include "php.h"
24 #include "php_tidy.h"
25 
26 #if HAVE_TIDY
27 
28 #include "php_ini.h"
29 #include "ext/standard/info.h"
30 
31 #if HAVE_TIDY_H
32 #include "tidy.h"
33 #elif HAVE_TIDYP_H
34 #include "tidyp.h"
35 #endif
36 
37 #if HAVE_TIDYBUFFIO_H
38 #include "tidybuffio.h"
39 #else
40 #include "buffio.h"
41 #endif
42 
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47 
48 /* {{{ ext/tidy macros
49 */
50 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
51 
52 #define TIDY_SET_CONTEXT \
53     zval *object = getThis();
54 
55 #define TIDY_FETCH_OBJECT	\
56 	PHPTidyObj *obj;	\
57 	TIDY_SET_CONTEXT; \
58 	if (object) {	\
59 		if (zend_parse_parameters_none() == FAILURE) {	\
60 			return;	\
61 		}	\
62 	} else {	\
63 		if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "O", &object, tidy_ce_doc) == FAILURE) {	\
64 			RETURN_FALSE;	\
65 		}	\
66 	}	\
67 	obj = Z_TIDY_P(object);	\
68 
69 #define TIDY_FETCH_INITIALIZED_OBJECT \
70 	TIDY_FETCH_OBJECT; \
71 	if (!obj->ptdoc->initialized) { \
72 		zend_throw_error(NULL, "tidy object is not initialized"); \
73 		return; \
74 	}
75 
76 #define TIDY_FETCH_ONLY_OBJECT	\
77 	PHPTidyObj *obj;	\
78 	TIDY_SET_CONTEXT; \
79 	if (zend_parse_parameters_none() == FAILURE) {	\
80 		return;	\
81 	}	\
82 	obj = Z_TIDY_P(object);	\
83 
84 #define TIDY_APPLY_CONFIG_ZVAL(_doc, _val) \
85     if(_val) { \
86         if(Z_TYPE_P(_val) == IS_ARRAY) { \
87             _php_tidy_apply_config_array(_doc, Z_ARRVAL_P(_val)); \
88         } else { \
89             convert_to_string_ex(_val); \
90             TIDY_OPEN_BASE_DIR_CHECK(Z_STRVAL_P(_val)); \
91             switch (tidyLoadConfig(_doc, Z_STRVAL_P(_val))) { \
92               case -1: \
93                 php_error_docref(NULL, E_WARNING, "Could not load configuration file '%s'", Z_STRVAL_P(_val)); \
94                 break; \
95               case 1: \
96                 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the configuration file '%s'", Z_STRVAL_P(_val)); \
97                 break; \
98             } \
99         } \
100     }
101 
102 #define REGISTER_TIDY_CLASS(classname, name, parent, __flags) \
103 	{ \
104 		zend_class_entry ce; \
105 		INIT_CLASS_ENTRY(ce, # classname, tidy_funcs_ ## name); \
106 		ce.create_object = tidy_object_new_ ## name; \
107 		tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent); \
108 		tidy_ce_ ## name->ce_flags |= __flags;  \
109 		memcpy(&tidy_object_handlers_ ## name, &std_object_handlers, sizeof(zend_object_handlers)); \
110 		tidy_object_handlers_ ## name.clone_obj = NULL; \
111 	}
112 
113 #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
114 #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
115 
116 #ifndef TRUE
117 #define TRUE 1
118 #endif
119 
120 #ifndef FALSE
121 #define FALSE 0
122 #endif
123 
124 #define ADD_PROPERTY_STRING(_table, _key, _string) \
125 	{ \
126 		zval tmp; \
127 		if (_string) { \
128 			ZVAL_STRING(&tmp, (char *)_string); \
129 		} else { \
130 			ZVAL_EMPTY_STRING(&tmp); \
131 		} \
132 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
133 	}
134 
135 #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
136    { \
137        zval tmp; \
138        if (_string) { \
139            ZVAL_STRINGL(&tmp, (char *)_string, _len); \
140        } else { \
141            ZVAL_EMPTY_STRING(&tmp); \
142        } \
143        zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
144    }
145 
146 #define ADD_PROPERTY_LONG(_table, _key, _long) \
147 	{ \
148 		zval tmp; \
149 		ZVAL_LONG(&tmp, _long); \
150 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
151 	}
152 
153 #define ADD_PROPERTY_NULL(_table, _key) \
154 	{ \
155 		zval tmp; \
156 		ZVAL_NULL(&tmp); \
157 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
158 	}
159 
160 #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
161     { \
162 		zval tmp; \
163 		ZVAL_BOOL(&tmp, _bool); \
164 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
165 	}
166 
167 #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
168 if (php_check_open_basedir(filename)) { \
169 	RETURN_FALSE; \
170 } \
171 
172 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
173 	if (TG(default_config) && TG(default_config)[0]) { \
174 		if (tidyLoadConfig(_doc, TG(default_config)) < 0) { \
175 			php_error_docref(NULL, E_WARNING, "Unable to load Tidy configuration file at '%s'.", TG(default_config)); \
176 		} \
177 	}
178 /* }}} */
179 
180 /* {{{ ext/tidy structs
181 */
182 typedef struct _PHPTidyDoc PHPTidyDoc;
183 typedef struct _PHPTidyObj PHPTidyObj;
184 
185 typedef enum {
186 	is_node,
187 	is_doc
188 } tidy_obj_type;
189 
190 typedef enum {
191 	is_root_node,
192 	is_html_node,
193 	is_head_node,
194 	is_body_node
195 } tidy_base_nodetypes;
196 
197 struct _PHPTidyDoc {
198 	TidyDoc			doc;
199 	TidyBuffer		*errbuf;
200 	unsigned int	ref_count;
201 	unsigned int    initialized:1;
202 };
203 
204 struct _PHPTidyObj {
205 	TidyNode		node;
206 	tidy_obj_type	type;
207 	PHPTidyDoc		*ptdoc;
208 	zend_object		std;
209 };
210 
php_tidy_fetch_object(zend_object * obj)211 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
212 	return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
213 }
214 
215 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
216 /* }}} */
217 
218 /* {{{ ext/tidy prototypes
219 */
220 static zend_string *php_tidy_file_to_mem(char *, zend_bool);
221 static void tidy_object_free_storage(zend_object *);
222 static zend_object *tidy_object_new_node(zend_class_entry *);
223 static zend_object *tidy_object_new_doc(zend_class_entry *);
224 static zval * tidy_instanciate(zend_class_entry *, zval *);
225 static int tidy_doc_cast_handler(zval *, zval *, int);
226 static int tidy_node_cast_handler(zval *, zval *, int);
227 static void tidy_doc_update_properties(PHPTidyObj *);
228 static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type);
229 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
230 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
231 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
232 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
233 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
234 static void _php_tidy_register_tags(INIT_FUNC_ARGS);
235 static PHP_INI_MH(php_tidy_set_clean_output);
236 static void php_tidy_clean_output_start(const char *name, size_t name_len);
237 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
238 static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
239 
240 static PHP_MINIT_FUNCTION(tidy);
241 static PHP_MSHUTDOWN_FUNCTION(tidy);
242 static PHP_RINIT_FUNCTION(tidy);
243 static PHP_MINFO_FUNCTION(tidy);
244 
245 static PHP_FUNCTION(tidy_getopt);
246 static PHP_FUNCTION(tidy_parse_string);
247 static PHP_FUNCTION(tidy_parse_file);
248 static PHP_FUNCTION(tidy_clean_repair);
249 static PHP_FUNCTION(tidy_repair_string);
250 static PHP_FUNCTION(tidy_repair_file);
251 static PHP_FUNCTION(tidy_diagnose);
252 static PHP_FUNCTION(tidy_get_output);
253 static PHP_FUNCTION(tidy_get_error_buffer);
254 static PHP_FUNCTION(tidy_get_release);
255 static PHP_FUNCTION(tidy_get_config);
256 static PHP_FUNCTION(tidy_get_status);
257 static PHP_FUNCTION(tidy_get_html_ver);
258 #if HAVE_TIDYOPTGETDOC
259 static PHP_FUNCTION(tidy_get_opt_doc);
260 #endif
261 static PHP_FUNCTION(tidy_is_xhtml);
262 static PHP_FUNCTION(tidy_is_xml);
263 static PHP_FUNCTION(tidy_error_count);
264 static PHP_FUNCTION(tidy_warning_count);
265 static PHP_FUNCTION(tidy_access_count);
266 static PHP_FUNCTION(tidy_config_count);
267 
268 static PHP_FUNCTION(tidy_get_root);
269 static PHP_FUNCTION(tidy_get_html);
270 static PHP_FUNCTION(tidy_get_head);
271 static PHP_FUNCTION(tidy_get_body);
272 
273 static TIDY_DOC_METHOD(__construct);
274 static TIDY_DOC_METHOD(parseFile);
275 static TIDY_DOC_METHOD(parseString);
276 
277 static TIDY_NODE_METHOD(hasChildren);
278 static TIDY_NODE_METHOD(hasSiblings);
279 static TIDY_NODE_METHOD(isComment);
280 static TIDY_NODE_METHOD(isHtml);
281 static TIDY_NODE_METHOD(isText);
282 static TIDY_NODE_METHOD(isJste);
283 static TIDY_NODE_METHOD(isAsp);
284 static TIDY_NODE_METHOD(isPhp);
285 static TIDY_NODE_METHOD(getParent);
286 static TIDY_NODE_METHOD(__construct);
287 /* }}} */
288 
289 ZEND_DECLARE_MODULE_GLOBALS(tidy)
290 
291 PHP_INI_BEGIN()
292 STD_PHP_INI_ENTRY("tidy.default_config",	"",		PHP_INI_SYSTEM,		OnUpdateString,				default_config,		zend_tidy_globals,	tidy_globals)
293 STD_PHP_INI_ENTRY("tidy.clean_output",		"0",	PHP_INI_USER,		php_tidy_set_clean_output,	clean_output,		zend_tidy_globals,	tidy_globals)
294 PHP_INI_END()
295 
296 /* {{{ arginfo */
297 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_string, 0, 0, 1)
298 	ZEND_ARG_INFO(0, input)
299 	ZEND_ARG_INFO(0, config_options)
300 	ZEND_ARG_INFO(0, encoding)
301 ZEND_END_ARG_INFO()
302 
303 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_error_buffer, 0, 0, 1)
304     ZEND_ARG_INFO(0, object)
305 ZEND_END_ARG_INFO()
306 
307 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_output, 0, 0, 1)
308     ZEND_ARG_INFO(0, object)
309 ZEND_END_ARG_INFO()
310 
311 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_file, 0, 0, 1)
312 	ZEND_ARG_INFO(0, file)
313 	ZEND_ARG_INFO(0, config_options)
314 	ZEND_ARG_INFO(0, encoding)
315 	ZEND_ARG_INFO(0, use_include_path)
316 ZEND_END_ARG_INFO()
317 
318 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_clean_repair, 0, 0, 1)
319     ZEND_ARG_INFO(0, object)
320 ZEND_END_ARG_INFO()
321 
322 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_string, 0, 0, 1)
323 	ZEND_ARG_INFO(0, data)
324 	ZEND_ARG_INFO(0, config_file)
325 	ZEND_ARG_INFO(0, encoding)
326 ZEND_END_ARG_INFO()
327 
328 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_file, 0, 0, 1)
329 	ZEND_ARG_INFO(0, filename)
330 	ZEND_ARG_INFO(0, config_file)
331 	ZEND_ARG_INFO(0, encoding)
332 	ZEND_ARG_INFO(0, use_include_path)
333 ZEND_END_ARG_INFO()
334 
335 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_diagnose, 0, 0, 1)
336     ZEND_ARG_INFO(0, object)
337 ZEND_END_ARG_INFO()
338 
339 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_release, 0)
340 ZEND_END_ARG_INFO()
341 
342 #if HAVE_TIDYOPTGETDOC
343 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc, 0, 0, 2)
344 	ZEND_ARG_INFO(0, resource)
345 	ZEND_ARG_INFO(0, optname)
346 ZEND_END_ARG_INFO()
347 
348 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc_method, 0, 0, 1)
349 	ZEND_ARG_INFO(0, optname)
350 ZEND_END_ARG_INFO()
351 #endif
352 
353 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_config, 0, 0, 1)
354     ZEND_ARG_INFO(0, object)
355 ZEND_END_ARG_INFO()
356 
357 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_status, 0, 0, 1)
358     ZEND_ARG_INFO(0, object)
359 ZEND_END_ARG_INFO()
360 
361 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_html_ver, 0, 0, 1)
362     ZEND_ARG_INFO(0, object)
363 ZEND_END_ARG_INFO()
364 
365 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xhtml, 0, 0, 1)
366     ZEND_ARG_INFO(0, object)
367 ZEND_END_ARG_INFO()
368 
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xml, 0, 0, 1)
370     ZEND_ARG_INFO(0, object)
371 ZEND_END_ARG_INFO()
372 
373 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_error_count, 0, 0, 1)
374     ZEND_ARG_INFO(0, object)
375 ZEND_END_ARG_INFO()
376 
377 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_warning_count, 0, 0, 1)
378     ZEND_ARG_INFO(0, object)
379 ZEND_END_ARG_INFO()
380 
381 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_access_count, 0, 0, 1)
382     ZEND_ARG_INFO(0, object)
383 ZEND_END_ARG_INFO()
384 
385 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_config_count, 0, 0, 1)
386     ZEND_ARG_INFO(0, object)
387 ZEND_END_ARG_INFO()
388 
389 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt, 0, 0, 1)
390 	ZEND_ARG_INFO(0, option)
391 ZEND_END_ARG_INFO()
392 
393 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_root, 0)
394 ZEND_END_ARG_INFO()
395 
396 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_html, 0)
397 ZEND_END_ARG_INFO()
398 
399 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_head, 0)
400 ZEND_END_ARG_INFO()
401 
402 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_body, 0, 0, 1)
403 	ZEND_ARG_INFO(0, tidy)
404 ZEND_END_ARG_INFO()
405 
406 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_construct, 0, 0, 0)
407     ZEND_ARG_INFO(0, filename)
408     ZEND_ARG_INFO(0, config_file)
409     ZEND_ARG_INFO(0, encoding)
410     ZEND_ARG_INFO(0, use_include_path)
411 ZEND_END_ARG_INFO()
412 /* }}} */
413 
414 static const zend_function_entry tidy_functions[] = {
415 	PHP_FE(tidy_getopt,             arginfo_tidy_getopt)
416 	PHP_FE(tidy_parse_string,       arginfo_tidy_parse_string)
417 	PHP_FE(tidy_parse_file,         arginfo_tidy_parse_file)
418 	PHP_FE(tidy_get_output,         arginfo_tidy_get_output)
419 	PHP_FE(tidy_get_error_buffer,   arginfo_tidy_get_error_buffer)
420 	PHP_FE(tidy_clean_repair,       arginfo_tidy_clean_repair)
421 	PHP_FE(tidy_repair_string,	arginfo_tidy_repair_string)
422 	PHP_FE(tidy_repair_file,	arginfo_tidy_repair_file)
423 	PHP_FE(tidy_diagnose,           arginfo_tidy_diagnose)
424 	PHP_FE(tidy_get_release,	arginfo_tidy_get_release)
425 	PHP_FE(tidy_get_config,		arginfo_tidy_get_config)
426 	PHP_FE(tidy_get_status,		arginfo_tidy_get_status)
427 	PHP_FE(tidy_get_html_ver,	arginfo_tidy_get_html_ver)
428 	PHP_FE(tidy_is_xhtml,		arginfo_tidy_is_xhtml)
429 	PHP_FE(tidy_is_xml,		arginfo_tidy_is_xml)
430 	PHP_FE(tidy_error_count,	arginfo_tidy_error_count)
431 	PHP_FE(tidy_warning_count,	arginfo_tidy_warning_count)
432 	PHP_FE(tidy_access_count,	arginfo_tidy_access_count)
433 	PHP_FE(tidy_config_count,	arginfo_tidy_config_count)
434 #if HAVE_TIDYOPTGETDOC
435 	PHP_FE(tidy_get_opt_doc,	arginfo_tidy_get_opt_doc)
436 #endif
437 	PHP_FE(tidy_get_root,		arginfo_tidy_get_root)
438 	PHP_FE(tidy_get_head,		arginfo_tidy_get_head)
439 	PHP_FE(tidy_get_html,		arginfo_tidy_get_html)
440 	PHP_FE(tidy_get_body,		arginfo_tidy_get_body)
441 	PHP_FE_END
442 };
443 
444 static const zend_function_entry tidy_funcs_doc[] = {
445 	TIDY_METHOD_MAP(getOpt, tidy_getopt, arginfo_tidy_getopt)
446 	TIDY_METHOD_MAP(cleanRepair, tidy_clean_repair, NULL)
447 	TIDY_DOC_ME(parseFile, arginfo_tidy_parse_file)
448 	TIDY_DOC_ME(parseString, arginfo_tidy_parse_string)
449 	TIDY_METHOD_MAP(repairString, tidy_repair_string, arginfo_tidy_repair_string)
450 	TIDY_METHOD_MAP(repairFile, tidy_repair_file, arginfo_tidy_repair_file)
451 	TIDY_METHOD_MAP(diagnose, tidy_diagnose, NULL)
452 	TIDY_METHOD_MAP(getRelease, tidy_get_release, NULL)
453 	TIDY_METHOD_MAP(getConfig, tidy_get_config, NULL)
454 	TIDY_METHOD_MAP(getStatus, tidy_get_status, NULL)
455 	TIDY_METHOD_MAP(getHtmlVer, tidy_get_html_ver, NULL)
456 #if HAVE_TIDYOPTGETDOC
457 	TIDY_METHOD_MAP(getOptDoc, tidy_get_opt_doc, arginfo_tidy_get_opt_doc_method)
458 #endif
459 	TIDY_METHOD_MAP(isXhtml, tidy_is_xhtml, NULL)
460 	TIDY_METHOD_MAP(isXml, tidy_is_xml, NULL)
461 	TIDY_METHOD_MAP(root, tidy_get_root, NULL)
462 	TIDY_METHOD_MAP(head, tidy_get_head, NULL)
463 	TIDY_METHOD_MAP(html, tidy_get_html, NULL)
464 	TIDY_METHOD_MAP(body, tidy_get_body, NULL)
465 	TIDY_DOC_ME(__construct, arginfo_tidy_construct)
466 	PHP_FE_END
467 };
468 
469 static const zend_function_entry tidy_funcs_node[] = {
470 	TIDY_NODE_ME(hasChildren, NULL)
471 	TIDY_NODE_ME(hasSiblings, NULL)
472 	TIDY_NODE_ME(isComment, NULL)
473 	TIDY_NODE_ME(isHtml, NULL)
474 	TIDY_NODE_ME(isText, NULL)
475 	TIDY_NODE_ME(isJste, NULL)
476 	TIDY_NODE_ME(isAsp, NULL)
477 	TIDY_NODE_ME(isPhp, NULL)
478 	TIDY_NODE_ME(getParent, NULL)
479 	TIDY_NODE_PRIVATE_ME(__construct, NULL)
480 	PHP_FE_END
481 };
482 
483 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
484 
485 static zend_object_handlers tidy_object_handlers_doc;
486 static zend_object_handlers tidy_object_handlers_node;
487 
488 zend_module_entry tidy_module_entry = {
489 	STANDARD_MODULE_HEADER,
490 	"tidy",
491 	tidy_functions,
492 	PHP_MINIT(tidy),
493 	PHP_MSHUTDOWN(tidy),
494 	PHP_RINIT(tidy),
495 	NULL,
496 	PHP_MINFO(tidy),
497 	PHP_TIDY_VERSION,
498 	PHP_MODULE_GLOBALS(tidy),
499 	NULL,
500 	NULL,
501 	NULL,
502 	STANDARD_MODULE_PROPERTIES_EX
503 };
504 
505 #ifdef COMPILE_DL_TIDY
506 #ifdef ZTS
507 ZEND_TSRMLS_CACHE_DEFINE()
508 #endif
ZEND_GET_MODULE(tidy)509 ZEND_GET_MODULE(tidy)
510 #endif
511 
512 static void* TIDY_CALL php_tidy_malloc(size_t len)
513 {
514 	return emalloc(len);
515 }
516 
php_tidy_realloc(void * buf,size_t len)517 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
518 {
519 	return erealloc(buf, len);
520 }
521 
php_tidy_free(void * buf)522 static void TIDY_CALL php_tidy_free(void *buf)
523 {
524 	efree(buf);
525 }
526 
php_tidy_panic(ctmbstr msg)527 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
528 {
529 	php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
530 }
531 
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)532 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
533 {
534 	TidyOption opt = tidyGetOptionByName(doc, optname);
535 	zend_string *str, *tmp_str;
536 	zend_long lval;
537 
538 	if (!opt) {
539 		php_error_docref(NULL, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname);
540 		return FAILURE;
541 	}
542 
543 	if (tidyOptIsReadOnly(opt)) {
544 		php_error_docref(NULL, E_NOTICE, "Attempting to set read-only option '%s'", optname);
545 		return FAILURE;
546 	}
547 
548 	switch(tidyOptGetType(opt)) {
549 		case TidyString:
550 			str = zval_get_tmp_string(value, &tmp_str);
551 			if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
552 				zend_tmp_string_release(tmp_str);
553 				return SUCCESS;
554 			}
555 			zend_tmp_string_release(tmp_str);
556 			break;
557 
558 		case TidyInteger:
559 			lval = zval_get_long(value);
560 			if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
561 				return SUCCESS;
562 			}
563 			break;
564 
565 		case TidyBoolean:
566 			lval = zval_get_long(value);
567 			if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
568 				return SUCCESS;
569 			}
570 			break;
571 
572 		default:
573 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
574 			break;
575 	}
576 
577 	return FAILURE;
578 }
579 
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,zend_bool is_file)580 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
581 {
582 	char *enc = NULL;
583 	size_t enc_len = 0;
584 	zend_bool use_include_path = 0;
585 	TidyDoc doc;
586 	TidyBuffer *errbuf;
587 	zend_string *data, *arg1;
588 	zval *config = NULL;
589 
590 	if (is_file) {
591 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
592 			RETURN_FALSE;
593 		}
594 		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
595 			RETURN_FALSE;
596 		}
597 	} else {
598 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
599 			RETURN_FALSE;
600 		}
601 		data = arg1;
602 	}
603 
604 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
605 		php_error_docref(NULL, E_WARNING, "Input string is too long");
606 		RETURN_FALSE;
607 	}
608 
609 	doc = tidyCreate();
610 	errbuf = emalloc(sizeof(TidyBuffer));
611 	tidyBufInit(errbuf);
612 
613 	if (tidySetErrorBuffer(doc, errbuf) != 0) {
614 		tidyBufFree(errbuf);
615 		efree(errbuf);
616 		tidyRelease(doc);
617 		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
618 	}
619 
620 	tidyOptSetBool(doc, TidyForceOutput, yes);
621 	tidyOptSetBool(doc, TidyMark, no);
622 
623 	TIDY_SET_DEFAULT_CONFIG(doc);
624 
625 	if (config) {
626 		TIDY_APPLY_CONFIG_ZVAL(doc, config);
627 	}
628 
629 	if(enc_len) {
630 		if (tidySetCharEncoding(doc, enc) < 0) {
631 			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
632 			RETVAL_FALSE;
633 		}
634 	}
635 
636 	if (data) {
637 		TidyBuffer buf;
638 
639 		tidyBufInit(&buf);
640 		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
641 
642 		if (tidyParseBuffer(doc, &buf) < 0) {
643 			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
644 			RETVAL_FALSE;
645 		} else {
646 			if (tidyCleanAndRepair(doc) >= 0) {
647 				TidyBuffer output;
648 				tidyBufInit(&output);
649 
650 				tidySaveBuffer (doc, &output);
651 				FIX_BUFFER(&output);
652 				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
653 				tidyBufFree(&output);
654 			} else {
655 				RETVAL_FALSE;
656 			}
657 		}
658 	}
659 
660 	if (is_file) {
661 		zend_string_release_ex(data, 0);
662 	}
663 
664 	tidyBufFree(errbuf);
665 	efree(errbuf);
666 	tidyRelease(doc);
667 }
668 
php_tidy_file_to_mem(char * filename,zend_bool use_include_path)669 static zend_string *php_tidy_file_to_mem(char *filename, zend_bool use_include_path)
670 {
671 	php_stream *stream;
672 	zend_string *data = NULL;
673 
674 	if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
675 		return NULL;
676 	}
677 	if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
678 		data = ZSTR_EMPTY_ALLOC();
679 	}
680 	php_stream_close(stream);
681 
682 	return data;
683 }
684 
tidy_object_free_storage(zend_object * object)685 static void tidy_object_free_storage(zend_object *object)
686 {
687 	PHPTidyObj *intern = php_tidy_fetch_object(object);
688 
689 	zend_object_std_dtor(&intern->std);
690 
691 	if (intern->ptdoc) {
692 		intern->ptdoc->ref_count--;
693 
694 		if (intern->ptdoc->ref_count <= 0) {
695 			tidyBufFree(intern->ptdoc->errbuf);
696 			efree(intern->ptdoc->errbuf);
697 			tidyRelease(intern->ptdoc->doc);
698 			efree(intern->ptdoc);
699 		}
700 	}
701 }
702 
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)703 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
704 {
705 	PHPTidyObj *intern;
706 
707 	intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
708 	zend_object_std_init(&intern->std, class_type);
709 	object_properties_init(&intern->std, class_type);
710 
711 	switch(objtype) {
712 		case is_node:
713 			break;
714 
715 		case is_doc:
716 			intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
717 			intern->ptdoc->doc = tidyCreate();
718 			intern->ptdoc->ref_count = 1;
719 			intern->ptdoc->initialized = 0;
720 			intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
721 			tidyBufInit(intern->ptdoc->errbuf);
722 
723 			if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
724 				tidyBufFree(intern->ptdoc->errbuf);
725 				efree(intern->ptdoc->errbuf);
726 				tidyRelease(intern->ptdoc->doc);
727 				efree(intern->ptdoc);
728 				efree(intern);
729 				php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
730 			}
731 
732 			tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
733 			tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
734 
735 			TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
736 
737 			tidy_add_default_properties(intern, is_doc);
738 			break;
739 	}
740 
741 	intern->std.handlers = handlers;
742 
743 	return &intern->std;
744 }
745 
tidy_object_new_node(zend_class_entry * class_type)746 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
747 {
748 	return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
749 }
750 
tidy_object_new_doc(zend_class_entry * class_type)751 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
752 {
753 	return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
754 }
755 
tidy_instanciate(zend_class_entry * pce,zval * object)756 static zval * tidy_instanciate(zend_class_entry *pce, zval *object)
757 {
758 	object_init_ex(object, pce);
759 	return object;
760 }
761 
tidy_doc_cast_handler(zval * in,zval * out,int type)762 static int tidy_doc_cast_handler(zval *in, zval *out, int type)
763 {
764 	TidyBuffer output;
765 	PHPTidyObj *obj;
766 
767 	switch (type) {
768 		case IS_LONG:
769 		case _IS_NUMBER:
770 			ZVAL_LONG(out, 0);
771 			break;
772 
773 		case IS_DOUBLE:
774 			ZVAL_DOUBLE(out, 0);
775 			break;
776 
777 		case _IS_BOOL:
778 			ZVAL_TRUE(out);
779 			break;
780 
781 		case IS_STRING:
782 			obj = Z_TIDY_P(in);
783 			tidyBufInit(&output);
784 			tidySaveBuffer (obj->ptdoc->doc, &output);
785 			ZVAL_STRINGL(out, (char *) output.bp, output.size ? output.size-1 : 0);
786 			tidyBufFree(&output);
787 			break;
788 
789 		default:
790 			return FAILURE;
791 	}
792 
793 	return SUCCESS;
794 }
795 
tidy_node_cast_handler(zval * in,zval * out,int type)796 static int tidy_node_cast_handler(zval *in, zval *out, int type)
797 {
798 	TidyBuffer buf;
799 	PHPTidyObj *obj;
800 
801 	switch(type) {
802 		case IS_LONG:
803 		case _IS_NUMBER:
804 			ZVAL_LONG(out, 0);
805 			break;
806 
807 		case IS_DOUBLE:
808 			ZVAL_DOUBLE(out, 0);
809 			break;
810 
811 		case _IS_BOOL:
812 			ZVAL_TRUE(out);
813 			break;
814 
815 		case IS_STRING:
816 			obj = Z_TIDY_P(in);
817 			tidyBufInit(&buf);
818 			if (obj->ptdoc) {
819 				tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
820 				ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
821 			} else {
822 				ZVAL_EMPTY_STRING(out);
823 			}
824 			tidyBufFree(&buf);
825 			break;
826 
827 		default:
828 			return FAILURE;
829 	}
830 
831 	return SUCCESS;
832 }
833 
tidy_doc_update_properties(PHPTidyObj * obj)834 static void tidy_doc_update_properties(PHPTidyObj *obj)
835 {
836 
837 	TidyBuffer output;
838 	zval temp;
839 
840 	tidyBufInit(&output);
841 	tidySaveBuffer (obj->ptdoc->doc, &output);
842 
843 	if (output.size) {
844 		if (!obj->std.properties) {
845 			rebuild_object_properties(&obj->std);
846 		}
847 		ZVAL_STRINGL(&temp, (char*)output.bp, output.size-1);
848 		zend_hash_str_update(obj->std.properties, "value", sizeof("value") - 1, &temp);
849 	}
850 
851 	tidyBufFree(&output);
852 
853 	if (obj->ptdoc->errbuf->size) {
854 		if (!obj->std.properties) {
855 			rebuild_object_properties(&obj->std);
856 		}
857 		ZVAL_STRINGL(&temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
858 		zend_hash_str_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer") - 1, &temp);
859 	}
860 }
861 
tidy_add_default_properties(PHPTidyObj * obj,tidy_obj_type type)862 static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
863 {
864 
865 	TidyBuffer buf;
866 	TidyAttr	tempattr;
867 	TidyNode	tempnode;
868 	zval attribute, children, temp;
869 	PHPTidyObj *newobj;
870 
871 	switch(type) {
872 
873 		case is_node:
874 			if (!obj->std.properties) {
875 				rebuild_object_properties(&obj->std);
876 			}
877 			tidyBufInit(&buf);
878 			tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
879 			ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
880 			tidyBufFree(&buf);
881 
882 			ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
883 			ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
884 			ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
885 			ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
886 			ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
887 
888 			switch(tidyNodeGetType(obj->node)) {
889 				case TidyNode_Root:
890 				case TidyNode_DocType:
891 				case TidyNode_Text:
892 				case TidyNode_Comment:
893 					break;
894 
895 				default:
896 					ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
897 			}
898 
899 			tempattr = tidyAttrFirst(obj->node);
900 
901 			if (tempattr) {
902 				char *name, *val;
903 				array_init(&attribute);
904 
905 				do {
906 					name = (char *)tidyAttrName(tempattr);
907 					val = (char *)tidyAttrValue(tempattr);
908 					if (name && val) {
909 						add_assoc_string(&attribute, name, val);
910 					}
911 				} while((tempattr = tidyAttrNext(tempattr)));
912 			} else {
913 				ZVAL_NULL(&attribute);
914 			}
915 			zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);
916 
917 			tempnode = tidyGetChild(obj->node);
918 
919 			if (tempnode) {
920 				array_init(&children);
921 				do {
922 					tidy_instanciate(tidy_ce_node, &temp);
923 					newobj = Z_TIDY_P(&temp);
924 					newobj->node = tempnode;
925 					newobj->type = is_node;
926 					newobj->ptdoc = obj->ptdoc;
927 					newobj->ptdoc->ref_count++;
928 
929 					tidy_add_default_properties(newobj, is_node);
930 					add_next_index_zval(&children, &temp);
931 
932 				} while((tempnode = tidyGetNext(tempnode)));
933 
934 			} else {
935 				ZVAL_NULL(&children);
936 			}
937 
938 			zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children);
939 
940 			break;
941 
942 		case is_doc:
943 			if (!obj->std.properties) {
944 				rebuild_object_properties(&obj->std);
945 			}
946 			ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
947 			ADD_PROPERTY_NULL(obj->std.properties, value);
948 			break;
949 	}
950 }
951 
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)952 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
953 {
954 	*type = tidyOptGetType(opt);
955 
956 	switch (*type) {
957 		case TidyString: {
958 			char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
959 			if (val) {
960 				return (void *) zend_string_init(val, strlen(val), 0);
961 			} else {
962 				return (void *) ZSTR_EMPTY_ALLOC();
963 			}
964 		}
965 			break;
966 
967 		case TidyInteger:
968 			return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
969 			break;
970 
971 		case TidyBoolean:
972 			return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
973 			break;
974 	}
975 
976 	/* should not happen */
977 	return NULL;
978 }
979 
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)980 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
981 {
982 	PHPTidyObj *newobj;
983 	TidyNode node;
984 	TIDY_FETCH_OBJECT;
985 
986 	switch (node_type) {
987 		case is_root_node:
988 			node = tidyGetRoot(obj->ptdoc->doc);
989 			break;
990 
991 		case is_html_node:
992 			node = tidyGetHtml(obj->ptdoc->doc);
993 			break;
994 
995 		case is_head_node:
996 			node = tidyGetHead(obj->ptdoc->doc);
997 			break;
998 
999 		case is_body_node:
1000 			node = tidyGetBody(obj->ptdoc->doc);
1001 			break;
1002 
1003 		default:
1004 			RETURN_NULL();
1005 			break;
1006 	}
1007 
1008 	if (!node) {
1009 		RETURN_NULL();
1010 	}
1011 
1012 	tidy_instanciate(tidy_ce_node, return_value);
1013 	newobj = Z_TIDY_P(return_value);
1014 	newobj->type  = is_node;
1015 	newobj->ptdoc = obj->ptdoc;
1016 	newobj->node  = node;
1017 	newobj->ptdoc->ref_count++;
1018 
1019 	tidy_add_default_properties(newobj, is_node);
1020 }
1021 
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)1022 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
1023 {
1024 	zval *opt_val;
1025 	zend_string *opt_name;
1026 
1027 	ZEND_HASH_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
1028 		if (opt_name == NULL) {
1029 			continue;
1030 		}
1031 		_php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
1032 	} ZEND_HASH_FOREACH_END();
1033 
1034 	return SUCCESS;
1035 }
1036 
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)1037 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
1038 {
1039 	TidyBuffer buf;
1040 
1041 	if(enc) {
1042 		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
1043 			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
1044 			return FAILURE;
1045 		}
1046 	}
1047 
1048 	obj->ptdoc->initialized = 1;
1049 
1050 	tidyBufInit(&buf);
1051 	tidyBufAttach(&buf, (byte *) string, len);
1052 	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
1053 		php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
1054 		return FAILURE;
1055 	}
1056 	tidy_doc_update_properties(obj);
1057 
1058 	return SUCCESS;
1059 }
1060 
PHP_MINIT_FUNCTION(tidy)1061 static PHP_MINIT_FUNCTION(tidy)
1062 {
1063 	tidySetMallocCall(php_tidy_malloc);
1064 	tidySetReallocCall(php_tidy_realloc);
1065 	tidySetFreeCall(php_tidy_free);
1066 	tidySetPanicCall(php_tidy_panic);
1067 
1068 	REGISTER_INI_ENTRIES();
1069 	REGISTER_TIDY_CLASS(tidy, doc,	NULL, 0);
1070 	REGISTER_TIDY_CLASS(tidyNode, node,	NULL, ZEND_ACC_FINAL);
1071 
1072 	tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
1073 	tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
1074 
1075 	tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
1076 	tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
1077 
1078 	_php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU);
1079 	_php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU);
1080 
1081 	php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
1082 
1083 	return SUCCESS;
1084 }
1085 
PHP_RINIT_FUNCTION(tidy)1086 static PHP_RINIT_FUNCTION(tidy)
1087 {
1088 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
1089 	ZEND_TSRMLS_CACHE_UPDATE();
1090 #endif
1091 
1092 	php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
1093 
1094 	return SUCCESS;
1095 }
1096 
PHP_MSHUTDOWN_FUNCTION(tidy)1097 static PHP_MSHUTDOWN_FUNCTION(tidy)
1098 {
1099 	UNREGISTER_INI_ENTRIES();
1100 	return SUCCESS;
1101 }
1102 
PHP_MINFO_FUNCTION(tidy)1103 static PHP_MINFO_FUNCTION(tidy)
1104 {
1105 	php_info_print_table_start();
1106 	php_info_print_table_row(2, "Tidy support", "enabled");
1107 #if HAVE_TIDYBUFFIO_H
1108 	php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
1109 #elif HAVE_TIDYP_H
1110 	php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
1111 #endif
1112 #if HAVE_TIDYRELEASEDATE
1113 	php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
1114 #endif
1115 	php_info_print_table_end();
1116 
1117 	DISPLAY_INI_ENTRIES();
1118 }
1119 
PHP_INI_MH(php_tidy_set_clean_output)1120 static PHP_INI_MH(php_tidy_set_clean_output)
1121 {
1122 	int status;
1123 	zend_bool value;
1124 
1125 	if (ZSTR_LEN(new_value)==2 && strcasecmp("on", ZSTR_VAL(new_value))==0) {
1126 		value = (zend_bool) 1;
1127 	} else if (ZSTR_LEN(new_value)==3 && strcasecmp("yes", ZSTR_VAL(new_value))==0) {
1128 		value = (zend_bool) 1;
1129 	} else if (ZSTR_LEN(new_value)==4 && strcasecmp("true", ZSTR_VAL(new_value))==0) {
1130 		value = (zend_bool) 1;
1131 	} else {
1132 		value = (zend_bool) atoi(ZSTR_VAL(new_value));
1133 	}
1134 
1135 	if (stage == PHP_INI_STAGE_RUNTIME) {
1136 		status = php_output_get_status();
1137 
1138 		if (value && (status & PHP_OUTPUT_WRITTEN)) {
1139 			php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
1140 			return FAILURE;
1141 		}
1142 		if (status & PHP_OUTPUT_SENT) {
1143 			php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
1144 			return FAILURE;
1145 		}
1146 	}
1147 
1148 	status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
1149 
1150 	if (stage == PHP_INI_STAGE_RUNTIME && value) {
1151 		if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
1152 			php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
1153 		}
1154 	}
1155 
1156 	return status;
1157 }
1158 
1159 /*
1160  * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
1161  */
1162 
php_tidy_clean_output_start(const char * name,size_t name_len)1163 static void php_tidy_clean_output_start(const char *name, size_t name_len)
1164 {
1165 	php_output_handler *h;
1166 
1167 	if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
1168 		php_output_handler_start(h);
1169 	}
1170 }
1171 
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)1172 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
1173 {
1174 	if (chunk_size) {
1175 		php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
1176 		return NULL;
1177 	}
1178 	if (!TG(clean_output)) {
1179 		TG(clean_output) = 1;
1180 	}
1181 	return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
1182 }
1183 
php_tidy_output_handler(void ** nothing,php_output_context * output_context)1184 static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
1185 {
1186 	int status = FAILURE;
1187 	TidyDoc doc;
1188 	TidyBuffer inbuf, outbuf, errbuf;
1189 
1190 	if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
1191 		doc = tidyCreate();
1192 		tidyBufInit(&errbuf);
1193 
1194 		if (0 == tidySetErrorBuffer(doc, &errbuf)) {
1195 			tidyOptSetBool(doc, TidyForceOutput, yes);
1196 			tidyOptSetBool(doc, TidyMark, no);
1197 
1198 			if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
1199 				php_error_docref(NULL, E_WARNING, "Input string is too long");
1200 				return status;
1201 			}
1202 
1203 			TIDY_SET_DEFAULT_CONFIG(doc);
1204 
1205 			tidyBufInit(&inbuf);
1206 			tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
1207 
1208 			if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
1209 				tidyBufInit(&outbuf);
1210 				tidySaveBuffer(doc, &outbuf);
1211 				FIX_BUFFER(&outbuf);
1212 				output_context->out.data = (char *) outbuf.bp;
1213 				output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
1214 				output_context->out.free = 1;
1215 				status = SUCCESS;
1216 			}
1217 		}
1218 
1219 		tidyRelease(doc);
1220 		tidyBufFree(&errbuf);
1221 	}
1222 
1223 	return status;
1224 }
1225 
1226 /* {{{ proto bool tidy_parse_string(string input [, mixed config_options [, string encoding]])
1227    Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1228 static PHP_FUNCTION(tidy_parse_string)
1229 {
1230 	char *enc = NULL;
1231 	size_t enc_len = 0;
1232 	zend_string *input;
1233 	zval *options = NULL;
1234 	PHPTidyObj *obj;
1235 
1236 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zs", &input, &options, &enc, &enc_len) == FAILURE) {
1237 		RETURN_FALSE;
1238 	}
1239 
1240 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1241 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1242 		RETURN_FALSE;
1243 	}
1244 
1245 	tidy_instanciate(tidy_ce_doc, return_value);
1246 	obj = Z_TIDY_P(return_value);
1247 
1248 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1249 
1250 	if (php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1251 		zval_ptr_dtor(return_value);
1252 		RETURN_FALSE;
1253 	}
1254 }
1255 /* }}} */
1256 
1257 /* {{{ proto string tidy_get_error_buffer()
1258    Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1259 static PHP_FUNCTION(tidy_get_error_buffer)
1260 {
1261 	TIDY_FETCH_OBJECT;
1262 
1263 	if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1264 		RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1265 	} else {
1266 		RETURN_FALSE;
1267 	}
1268 }
1269 /* }}} */
1270 
1271 /* {{{ proto string tidy_get_output(tidy tidy)
1272    Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1273 static PHP_FUNCTION(tidy_get_output)
1274 {
1275 	TidyBuffer output;
1276 	TIDY_FETCH_OBJECT;
1277 
1278 	tidyBufInit(&output);
1279 	tidySaveBuffer(obj->ptdoc->doc, &output);
1280 	FIX_BUFFER(&output);
1281 	RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1282 	tidyBufFree(&output);
1283 }
1284 /* }}} */
1285 
1286 /* {{{ proto bool tidy_parse_file(string file [, mixed config_options [, string encoding [, bool use_include_path]]])
1287    Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1288 static PHP_FUNCTION(tidy_parse_file)
1289 {
1290 	char *enc = NULL;
1291 	size_t enc_len = 0;
1292 	zend_bool use_include_path = 0;
1293 	zend_string *inputfile, *contents;
1294 	zval *options = NULL;
1295 
1296 	PHPTidyObj *obj;
1297 
1298 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &inputfile,
1299 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1300 		RETURN_FALSE;
1301 	}
1302 
1303 	tidy_instanciate(tidy_ce_doc, return_value);
1304 	obj = Z_TIDY_P(return_value);
1305 
1306 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1307 		php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
1308 		RETURN_FALSE;
1309 	}
1310 
1311 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1312 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1313 		RETURN_FALSE;
1314 	}
1315 
1316 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1317 
1318 	if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1319 		zval_ptr_dtor(return_value);
1320 		RETVAL_FALSE;
1321 	}
1322 
1323 	zend_string_release_ex(contents, 0);
1324 }
1325 /* }}} */
1326 
1327 /* {{{ proto bool tidy_clean_repair(tidy tidy)
1328    Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1329 static PHP_FUNCTION(tidy_clean_repair)
1330 {
1331 	TIDY_FETCH_OBJECT;
1332 
1333 	if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1334 		tidy_doc_update_properties(obj);
1335 		RETURN_TRUE;
1336 	}
1337 
1338 	RETURN_FALSE;
1339 }
1340 /* }}} */
1341 
1342 /* {{{ proto bool tidy_repair_string(string data [, mixed config_file [, string encoding]])
1343    Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1344 static PHP_FUNCTION(tidy_repair_string)
1345 {
1346 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, FALSE);
1347 }
1348 /* }}} */
1349 
1350 /* {{{ proto bool tidy_repair_file(string filename [, mixed config_file [, string encoding [, bool use_include_path]]])
1351    Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1352 static PHP_FUNCTION(tidy_repair_file)
1353 {
1354 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, TRUE);
1355 }
1356 /* }}} */
1357 
1358 /* {{{ proto bool tidy_diagnose()
1359    Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1360 static PHP_FUNCTION(tidy_diagnose)
1361 {
1362 	TIDY_FETCH_OBJECT;
1363 
1364 	if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1365 		tidy_doc_update_properties(obj);
1366 		RETURN_TRUE;
1367 	}
1368 
1369 	RETURN_FALSE;
1370 }
1371 /* }}} */
1372 
1373 /* {{{ proto string tidy_get_release()
1374    Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1375 static PHP_FUNCTION(tidy_get_release)
1376 {
1377 	if (zend_parse_parameters_none() == FAILURE) {
1378 		return;
1379 	}
1380 
1381 #if HAVE_TIDYRELEASEDATE
1382 	RETURN_STRING((char *)tidyReleaseDate());
1383 #else
1384 	RETURN_STRING((char *)"unknown");
1385 #endif
1386 }
1387 /* }}} */
1388 
1389 
1390 #if HAVE_TIDYOPTGETDOC
1391 /* {{{ proto string tidy_get_opt_doc(tidy resource, string optname)
1392    Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1393 static PHP_FUNCTION(tidy_get_opt_doc)
1394 {
1395 	PHPTidyObj *obj;
1396 	char *optval, *optname;
1397 	size_t optname_len;
1398 	TidyOption opt;
1399 
1400 	TIDY_SET_CONTEXT;
1401 
1402 	if (object) {
1403 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &optname, &optname_len) == FAILURE) {
1404 			RETURN_FALSE;
1405 		}
1406 	} else {
1407 		if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1408 			RETURN_FALSE;
1409 		}
1410 	}
1411 
1412 	obj = Z_TIDY_P(object);
1413 
1414 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1415 
1416 	if (!opt) {
1417 		php_error_docref(NULL, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
1418 		RETURN_FALSE;
1419 	}
1420 
1421 	if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1422 		RETURN_STRING(optval);
1423 	}
1424 
1425 	RETURN_FALSE;
1426 }
1427 /* }}} */
1428 #endif
1429 
1430 
1431 /* {{{ proto array tidy_get_config(tidy tidy)
1432    Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1433 static PHP_FUNCTION(tidy_get_config)
1434 {
1435 	TidyIterator itOpt;
1436 	char *opt_name;
1437 	void *opt_value;
1438 	TidyOptionType optt;
1439 
1440 	TIDY_FETCH_OBJECT;
1441 
1442 	itOpt = tidyGetOptionList(obj->ptdoc->doc);
1443 
1444 	array_init(return_value);
1445 
1446 	while (itOpt) {
1447 		TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1448 
1449 		opt_name = (char *)tidyOptGetName(opt);
1450 		opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1451 		switch (optt) {
1452 			case TidyString:
1453 				add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1454 				break;
1455 
1456 			case TidyInteger:
1457 				add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1458 				break;
1459 
1460 			case TidyBoolean:
1461 				add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1462 				break;
1463 		}
1464 	}
1465 
1466 	return;
1467 }
1468 /* }}} */
1469 
1470 /* {{{ proto int tidy_get_status(tidy tidy)
1471    Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1472 static PHP_FUNCTION(tidy_get_status)
1473 {
1474 	TIDY_FETCH_OBJECT;
1475 
1476 	RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1477 }
1478 /* }}} */
1479 
1480 /* {{{ proto int tidy_get_html_ver(tidy tidy)
1481    Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1482 static PHP_FUNCTION(tidy_get_html_ver)
1483 {
1484 	TIDY_FETCH_INITIALIZED_OBJECT;
1485 
1486 	RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1487 }
1488 /* }}} */
1489 
1490 /* {{{ proto bool tidy_is_xhtml(tidy tidy)
1491    Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1492 static PHP_FUNCTION(tidy_is_xhtml)
1493 {
1494 	TIDY_FETCH_INITIALIZED_OBJECT;
1495 
1496 	RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1497 }
1498 /* }}} */
1499 
1500 /* {{{ proto bool tidy_is_xml(tidy tidy)
1501    Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1502 static PHP_FUNCTION(tidy_is_xml)
1503 {
1504 	TIDY_FETCH_INITIALIZED_OBJECT;
1505 
1506 	RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1507 }
1508 /* }}} */
1509 
1510 /* {{{ proto int tidy_error_count(tidy tidy)
1511    Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1512 static PHP_FUNCTION(tidy_error_count)
1513 {
1514 	TIDY_FETCH_OBJECT;
1515 
1516 	RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1517 }
1518 /* }}} */
1519 
1520 /* {{{ proto int tidy_warning_count(tidy tidy)
1521    Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1522 static PHP_FUNCTION(tidy_warning_count)
1523 {
1524 	TIDY_FETCH_OBJECT;
1525 
1526 	RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1527 }
1528 /* }}} */
1529 
1530 /* {{{ proto int tidy_access_count(tidy tidy)
1531    Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1532 static PHP_FUNCTION(tidy_access_count)
1533 {
1534 	TIDY_FETCH_OBJECT;
1535 
1536 	RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1537 }
1538 /* }}} */
1539 
1540 /* {{{ proto int tidy_config_count(tidy tidy)
1541    Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1542 static PHP_FUNCTION(tidy_config_count)
1543 {
1544 	TIDY_FETCH_OBJECT;
1545 
1546 	RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1547 }
1548 /* }}} */
1549 
1550 /* {{{ proto mixed tidy_getopt(string option)
1551    Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1552 static PHP_FUNCTION(tidy_getopt)
1553 {
1554 	PHPTidyObj *obj;
1555 	char *optname;
1556 	void *optval;
1557 	size_t optname_len;
1558 	TidyOption opt;
1559 	TidyOptionType optt;
1560 
1561 	TIDY_SET_CONTEXT;
1562 
1563 	if (object) {
1564 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &optname, &optname_len) == FAILURE) {
1565 			RETURN_FALSE;
1566 		}
1567 	} else {
1568 		if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1569 			RETURN_FALSE;
1570 		}
1571 	}
1572 
1573 	obj = Z_TIDY_P(object);
1574 
1575 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1576 
1577 	if (!opt) {
1578 		php_error_docref(NULL, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
1579 		RETURN_FALSE;
1580 	}
1581 
1582 	optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1583 	switch (optt) {
1584 		case TidyString:
1585 			RETVAL_STR((zend_string*)optval);
1586 			return;
1587 
1588 		case TidyInteger:
1589 			RETURN_LONG((zend_long)optval);
1590 			break;
1591 
1592 		case TidyBoolean:
1593 			if (optval) {
1594 				RETURN_TRUE;
1595 			} else {
1596 				RETURN_FALSE;
1597 			}
1598 			break;
1599 
1600 		default:
1601 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1602 			break;
1603 	}
1604 
1605 	RETURN_FALSE;
1606 }
1607 /* }}} */
1608 
TIDY_DOC_METHOD(__construct)1609 static TIDY_DOC_METHOD(__construct)
1610 {
1611 	char *enc = NULL;
1612 	size_t enc_len = 0;
1613 	zend_bool use_include_path = 0;
1614 	zval *options = NULL;
1615 	zend_string *contents, *inputfile = NULL;
1616 
1617 	PHPTidyObj *obj;
1618 	TIDY_SET_CONTEXT;
1619 
1620 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|Pzsb", &inputfile,
1621 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1622 		RETURN_FALSE;
1623 	}
1624 
1625 	obj = Z_TIDY_P(object);
1626 
1627 	if (inputfile) {
1628 		if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1629 			php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
1630 			return;
1631 		}
1632 
1633 		if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1634 			php_error_docref(NULL, E_WARNING, "Input string is too long");
1635 			RETURN_FALSE;
1636 		}
1637 
1638 		TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1639 
1640 		php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1641 
1642 		zend_string_release_ex(contents, 0);
1643 	}
1644 }
1645 
TIDY_DOC_METHOD(parseFile)1646 static TIDY_DOC_METHOD(parseFile)
1647 {
1648 	char *enc = NULL;
1649 	size_t enc_len = 0;
1650 	zend_bool use_include_path = 0;
1651 	zval *options = NULL;
1652 	zend_string *inputfile, *contents;
1653 	PHPTidyObj *obj;
1654 
1655 	TIDY_SET_CONTEXT;
1656 
1657 	obj = Z_TIDY_P(object);
1658 
1659 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &inputfile,
1660 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1661 		RETURN_FALSE;
1662 	}
1663 
1664 	if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1665 		php_error_docref(NULL, E_WARNING, "Cannot Load '%s' into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (Using include path)" : "");
1666 		RETURN_FALSE;
1667 	}
1668 
1669 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1670 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1671 		RETURN_FALSE;
1672 	}
1673 
1674 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1675 
1676 	if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1677 		RETVAL_FALSE;
1678 	} else {
1679 		RETVAL_TRUE;
1680 	}
1681 
1682 	zend_string_release_ex(contents, 0);
1683 }
1684 
TIDY_DOC_METHOD(parseString)1685 static TIDY_DOC_METHOD(parseString)
1686 {
1687 	char *enc = NULL;
1688 	size_t enc_len = 0;
1689 	zval *options = NULL;
1690 	PHPTidyObj *obj;
1691 	zend_string *input;
1692 
1693 	TIDY_SET_CONTEXT;
1694 
1695 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zs", &input, &options, &enc, &enc_len) == FAILURE) {
1696 		RETURN_FALSE;
1697 	}
1698 
1699 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1700 		php_error_docref(NULL, E_WARNING, "Input string is too long");
1701 		RETURN_FALSE;
1702 	}
1703 
1704 	obj = Z_TIDY_P(object);
1705 
1706 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1707 
1708 	if(php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1709 		RETURN_TRUE;
1710 	}
1711 
1712 	RETURN_FALSE;
1713 }
1714 
1715 
1716 /* {{{ proto TidyNode tidy_get_root()
1717    Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1718 static PHP_FUNCTION(tidy_get_root)
1719 {
1720 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1721 }
1722 /* }}} */
1723 
1724 /* {{{ proto TidyNode tidy_get_html()
1725    Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1726 static PHP_FUNCTION(tidy_get_html)
1727 {
1728 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1729 }
1730 /* }}} */
1731 
1732 /* {{{ proto TidyNode tidy_get_head()
1733    Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1734 static PHP_FUNCTION(tidy_get_head)
1735 {
1736 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1737 }
1738 /* }}} */
1739 
1740 /* {{{ proto TidyNode tidy_get_body(tidy tidy)
1741    Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1742 static PHP_FUNCTION(tidy_get_body)
1743 {
1744 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1745 }
1746 /* }}} */
1747 
1748 /* {{{ proto bool tidyNode::hasChildren()
1749    Returns true if this node has children */
TIDY_NODE_METHOD(hasChildren)1750 static TIDY_NODE_METHOD(hasChildren)
1751 {
1752 	TIDY_FETCH_ONLY_OBJECT;
1753 
1754 	if (tidyGetChild(obj->node)) {
1755 		RETURN_TRUE;
1756 	} else {
1757 		RETURN_FALSE;
1758 	}
1759 }
1760 /* }}} */
1761 
1762 /* {{{ proto bool tidyNode::hasSiblings()
1763    Returns true if this node has siblings */
TIDY_NODE_METHOD(hasSiblings)1764 static TIDY_NODE_METHOD(hasSiblings)
1765 {
1766 	TIDY_FETCH_ONLY_OBJECT;
1767 
1768 	if (obj->node && tidyGetNext(obj->node)) {
1769 		RETURN_TRUE;
1770 	} else {
1771 		RETURN_FALSE;
1772 	}
1773 }
1774 /* }}} */
1775 
1776 /* {{{ proto bool tidyNode::isComment()
1777    Returns true if this node represents a comment */
TIDY_NODE_METHOD(isComment)1778 static TIDY_NODE_METHOD(isComment)
1779 {
1780 	TIDY_FETCH_ONLY_OBJECT;
1781 
1782 	if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1783 		RETURN_TRUE;
1784 	} else {
1785 		RETURN_FALSE;
1786 	}
1787 }
1788 /* }}} */
1789 
1790 /* {{{ proto bool tidyNode::isHtml()
1791    Returns true if this node is part of a HTML document */
TIDY_NODE_METHOD(isHtml)1792 static TIDY_NODE_METHOD(isHtml)
1793 {
1794 	TIDY_FETCH_ONLY_OBJECT;
1795 
1796 	switch (tidyNodeGetType(obj->node)) {
1797 		case TidyNode_Start:
1798 		case TidyNode_End:
1799 		case TidyNode_StartEnd:
1800 			RETURN_TRUE;
1801 		default:
1802 			RETURN_FALSE;
1803 	}
1804 }
1805 /* }}} */
1806 
1807 /* {{{ proto bool tidyNode::isText()
1808    Returns true if this node represents text (no markup) */
TIDY_NODE_METHOD(isText)1809 static TIDY_NODE_METHOD(isText)
1810 {
1811 	TIDY_FETCH_ONLY_OBJECT;
1812 
1813 	if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1814 		RETURN_TRUE;
1815 	} else {
1816 		RETURN_FALSE;
1817 	}
1818 }
1819 /* }}} */
1820 
1821 /* {{{ proto bool tidyNode::isJste()
1822    Returns true if this node is JSTE */
TIDY_NODE_METHOD(isJste)1823 static TIDY_NODE_METHOD(isJste)
1824 {
1825 	TIDY_FETCH_ONLY_OBJECT;
1826 
1827 	if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1828 		RETURN_TRUE;
1829 	} else {
1830 		RETURN_FALSE;
1831 	}
1832 }
1833 /* }}} */
1834 
1835 /* {{{ proto bool tidyNode::isAsp()
1836    Returns true if this node is ASP */
TIDY_NODE_METHOD(isAsp)1837 static TIDY_NODE_METHOD(isAsp)
1838 {
1839 	TIDY_FETCH_ONLY_OBJECT;
1840 
1841 	if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1842 		RETURN_TRUE;
1843 	} else {
1844 		RETURN_FALSE;
1845 	}
1846 }
1847 /* }}} */
1848 
1849 /* {{{ proto bool tidyNode::isPhp()
1850    Returns true if this node is PHP */
TIDY_NODE_METHOD(isPhp)1851 static TIDY_NODE_METHOD(isPhp)
1852 {
1853 	TIDY_FETCH_ONLY_OBJECT;
1854 
1855 	if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1856 		RETURN_TRUE;
1857 	} else {
1858 		RETURN_FALSE;
1859 	}
1860 }
1861 /* }}} */
1862 
1863 /* {{{ proto tidyNode tidyNode::getParent()
1864    Returns the parent node if available or NULL */
TIDY_NODE_METHOD(getParent)1865 static TIDY_NODE_METHOD(getParent)
1866 {
1867 	TidyNode	parent_node;
1868 	PHPTidyObj *newobj;
1869 	TIDY_FETCH_ONLY_OBJECT;
1870 
1871 	parent_node = tidyGetParent(obj->node);
1872 	if(parent_node) {
1873 		tidy_instanciate(tidy_ce_node, return_value);
1874 		newobj = Z_TIDY_P(return_value);
1875 		newobj->node = parent_node;
1876 		newobj->type = is_node;
1877 		newobj->ptdoc = obj->ptdoc;
1878 		newobj->ptdoc->ref_count++;
1879 		tidy_add_default_properties(newobj, is_node);
1880 	} else {
1881 		ZVAL_NULL(return_value);
1882 	}
1883 }
1884 /* }}} */
1885 
1886 
1887 /* {{{ proto tidyNode::__construct()
1888          __constructor for tidyNode. */
TIDY_NODE_METHOD(__construct)1889 static TIDY_NODE_METHOD(__construct)
1890 {
1891 	zend_throw_error(NULL, "You should not create a tidyNode manually");
1892 }
1893 /* }}} */
1894 
_php_tidy_register_nodetypes(INIT_FUNC_ARGS)1895 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS)
1896 {
1897 	TIDY_NODE_CONST(ROOT, Root);
1898 	TIDY_NODE_CONST(DOCTYPE, DocType);
1899 	TIDY_NODE_CONST(COMMENT, Comment);
1900 	TIDY_NODE_CONST(PROCINS, ProcIns);
1901 	TIDY_NODE_CONST(TEXT, Text);
1902 	TIDY_NODE_CONST(START, Start);
1903 	TIDY_NODE_CONST(END, End);
1904 	TIDY_NODE_CONST(STARTEND, StartEnd);
1905 	TIDY_NODE_CONST(CDATA, CDATA);
1906 	TIDY_NODE_CONST(SECTION, Section);
1907 	TIDY_NODE_CONST(ASP, Asp);
1908 	TIDY_NODE_CONST(JSTE, Jste);
1909 	TIDY_NODE_CONST(PHP, Php);
1910 	TIDY_NODE_CONST(XMLDECL, XmlDecl);
1911 }
1912 
_php_tidy_register_tags(INIT_FUNC_ARGS)1913 static void _php_tidy_register_tags(INIT_FUNC_ARGS)
1914 {
1915 	TIDY_TAG_CONST(UNKNOWN);
1916 	TIDY_TAG_CONST(A);
1917 	TIDY_TAG_CONST(ABBR);
1918 	TIDY_TAG_CONST(ACRONYM);
1919 	TIDY_TAG_CONST(ADDRESS);
1920 	TIDY_TAG_CONST(ALIGN);
1921 	TIDY_TAG_CONST(APPLET);
1922 	TIDY_TAG_CONST(AREA);
1923 	TIDY_TAG_CONST(B);
1924 	TIDY_TAG_CONST(BASE);
1925 	TIDY_TAG_CONST(BASEFONT);
1926 	TIDY_TAG_CONST(BDO);
1927 	TIDY_TAG_CONST(BGSOUND);
1928 	TIDY_TAG_CONST(BIG);
1929 	TIDY_TAG_CONST(BLINK);
1930 	TIDY_TAG_CONST(BLOCKQUOTE);
1931 	TIDY_TAG_CONST(BODY);
1932 	TIDY_TAG_CONST(BR);
1933 	TIDY_TAG_CONST(BUTTON);
1934 	TIDY_TAG_CONST(CAPTION);
1935 	TIDY_TAG_CONST(CENTER);
1936 	TIDY_TAG_CONST(CITE);
1937 	TIDY_TAG_CONST(CODE);
1938 	TIDY_TAG_CONST(COL);
1939 	TIDY_TAG_CONST(COLGROUP);
1940 	TIDY_TAG_CONST(COMMENT);
1941 	TIDY_TAG_CONST(DD);
1942 	TIDY_TAG_CONST(DEL);
1943 	TIDY_TAG_CONST(DFN);
1944 	TIDY_TAG_CONST(DIR);
1945 	TIDY_TAG_CONST(DIV);
1946 	TIDY_TAG_CONST(DL);
1947 	TIDY_TAG_CONST(DT);
1948 	TIDY_TAG_CONST(EM);
1949 	TIDY_TAG_CONST(EMBED);
1950 	TIDY_TAG_CONST(FIELDSET);
1951 	TIDY_TAG_CONST(FONT);
1952 	TIDY_TAG_CONST(FORM);
1953 	TIDY_TAG_CONST(FRAME);
1954 	TIDY_TAG_CONST(FRAMESET);
1955 	TIDY_TAG_CONST(H1);
1956 	TIDY_TAG_CONST(H2);
1957 	TIDY_TAG_CONST(H3);
1958 	TIDY_TAG_CONST(H4);
1959 	TIDY_TAG_CONST(H5);
1960 	TIDY_TAG_CONST(H6);
1961 	TIDY_TAG_CONST(HEAD);
1962 	TIDY_TAG_CONST(HR);
1963 	TIDY_TAG_CONST(HTML);
1964 	TIDY_TAG_CONST(I);
1965 	TIDY_TAG_CONST(IFRAME);
1966 	TIDY_TAG_CONST(ILAYER);
1967 	TIDY_TAG_CONST(IMG);
1968 	TIDY_TAG_CONST(INPUT);
1969 	TIDY_TAG_CONST(INS);
1970 	TIDY_TAG_CONST(ISINDEX);
1971 	TIDY_TAG_CONST(KBD);
1972 	TIDY_TAG_CONST(KEYGEN);
1973 	TIDY_TAG_CONST(LABEL);
1974 	TIDY_TAG_CONST(LAYER);
1975 	TIDY_TAG_CONST(LEGEND);
1976 	TIDY_TAG_CONST(LI);
1977 	TIDY_TAG_CONST(LINK);
1978 	TIDY_TAG_CONST(LISTING);
1979 	TIDY_TAG_CONST(MAP);
1980 	TIDY_TAG_CONST(MARQUEE);
1981 	TIDY_TAG_CONST(MENU);
1982 	TIDY_TAG_CONST(META);
1983 	TIDY_TAG_CONST(MULTICOL);
1984 	TIDY_TAG_CONST(NOBR);
1985 	TIDY_TAG_CONST(NOEMBED);
1986 	TIDY_TAG_CONST(NOFRAMES);
1987 	TIDY_TAG_CONST(NOLAYER);
1988 	TIDY_TAG_CONST(NOSAVE);
1989 	TIDY_TAG_CONST(NOSCRIPT);
1990 	TIDY_TAG_CONST(OBJECT);
1991 	TIDY_TAG_CONST(OL);
1992 	TIDY_TAG_CONST(OPTGROUP);
1993 	TIDY_TAG_CONST(OPTION);
1994 	TIDY_TAG_CONST(P);
1995 	TIDY_TAG_CONST(PARAM);
1996 	TIDY_TAG_CONST(PLAINTEXT);
1997 	TIDY_TAG_CONST(PRE);
1998 	TIDY_TAG_CONST(Q);
1999 	TIDY_TAG_CONST(RB);
2000 	TIDY_TAG_CONST(RBC);
2001 	TIDY_TAG_CONST(RP);
2002 	TIDY_TAG_CONST(RT);
2003 	TIDY_TAG_CONST(RTC);
2004 	TIDY_TAG_CONST(RUBY);
2005 	TIDY_TAG_CONST(S);
2006 	TIDY_TAG_CONST(SAMP);
2007 	TIDY_TAG_CONST(SCRIPT);
2008 	TIDY_TAG_CONST(SELECT);
2009 	TIDY_TAG_CONST(SERVER);
2010 	TIDY_TAG_CONST(SERVLET);
2011 	TIDY_TAG_CONST(SMALL);
2012 	TIDY_TAG_CONST(SPACER);
2013 	TIDY_TAG_CONST(SPAN);
2014 	TIDY_TAG_CONST(STRIKE);
2015 	TIDY_TAG_CONST(STRONG);
2016 	TIDY_TAG_CONST(STYLE);
2017 	TIDY_TAG_CONST(SUB);
2018 	TIDY_TAG_CONST(SUP);
2019 	TIDY_TAG_CONST(TABLE);
2020 	TIDY_TAG_CONST(TBODY);
2021 	TIDY_TAG_CONST(TD);
2022 	TIDY_TAG_CONST(TEXTAREA);
2023 	TIDY_TAG_CONST(TFOOT);
2024 	TIDY_TAG_CONST(TH);
2025 	TIDY_TAG_CONST(THEAD);
2026 	TIDY_TAG_CONST(TITLE);
2027 	TIDY_TAG_CONST(TR);
2028 	TIDY_TAG_CONST(TT);
2029 	TIDY_TAG_CONST(U);
2030 	TIDY_TAG_CONST(UL);
2031 	TIDY_TAG_CONST(VAR);
2032 	TIDY_TAG_CONST(WBR);
2033 	TIDY_TAG_CONST(XMP);
2034 }
2035 
2036 #endif
2037 
2038 /*
2039  * Local variables:
2040  * tab-width: 4
2041  * c-basic-offset: 4
2042  * End:
2043  * vim600: noet sw=4 ts=4 fdm=marker
2044  * vim<600: noet sw=4 ts=4
2045  */
2046