xref: /PHP-5.6/ext/tidy/tidy.c (revision 49493a2d)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2016 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: John Coggeshall <john@php.net>                               |
16   +----------------------------------------------------------------------+
17 */
18 
19 /* $Id: 57f050b275c6da348310461a64aaad21feef8091 $ */
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "php.h"
26 #include "php_tidy.h"
27 
28 #if HAVE_TIDY
29 
30 #include "php_ini.h"
31 #include "ext/standard/info.h"
32 
33 #include "tidy.h"
34 #include "buffio.h"
35 
36 /* compatibility with older versions of libtidy */
37 #ifndef TIDY_CALL
38 #define TIDY_CALL
39 #endif
40 
41 #define PHP_TIDY_MODULE_VERSION	"2.0"
42 
43 /* {{{ ext/tidy macros
44 */
45 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
46 
47 #define TIDY_SET_CONTEXT \
48     zval *object = getThis();
49 
50 #define TIDY_FETCH_OBJECT	\
51 	PHPTidyObj *obj;	\
52 	TIDY_SET_CONTEXT; \
53 	if (object) {	\
54 		if (zend_parse_parameters_none() == FAILURE) {	\
55 			return;	\
56 		}	\
57 	} else {	\
58 		if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, NULL, "O", &object, tidy_ce_doc) == FAILURE) {	\
59 			RETURN_FALSE;	\
60 		}	\
61 	}	\
62 	obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC);	\
63 
64 #define TIDY_FETCH_ONLY_OBJECT	\
65 	PHPTidyObj *obj;	\
66 	TIDY_SET_CONTEXT; \
67 	if (zend_parse_parameters_none() == FAILURE) {	\
68 		return;	\
69 	}	\
70 	obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC);	\
71 
72 #define TIDY_APPLY_CONFIG_ZVAL(_doc, _val) \
73     if(_val) { \
74         if(Z_TYPE_PP(_val) == IS_ARRAY) { \
75             _php_tidy_apply_config_array(_doc, HASH_OF(*_val) TSRMLS_CC); \
76         } else { \
77             convert_to_string_ex(_val); \
78             TIDY_OPEN_BASE_DIR_CHECK(Z_STRVAL_PP(_val)); \
79             switch (tidyLoadConfig(_doc, Z_STRVAL_PP(_val))) { \
80               case -1: \
81                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not load configuration file '%s'", Z_STRVAL_PP(_val)); \
82                 break; \
83               case 1: \
84                 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "There were errors while parsing the configuration file '%s'", Z_STRVAL_PP(_val)); \
85                 break; \
86             } \
87         } \
88     }
89 
90 #define REGISTER_TIDY_CLASS(classname, name, parent, __flags) \
91 	{ \
92 		zend_class_entry ce; \
93 		INIT_CLASS_ENTRY(ce, # classname, tidy_funcs_ ## name); \
94 		ce.create_object = tidy_object_new_ ## name; \
95 		tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent, NULL TSRMLS_CC); \
96 		tidy_ce_ ## name->ce_flags |= __flags;  \
97 		memcpy(&tidy_object_handlers_ ## name, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); \
98 		tidy_object_handlers_ ## name.clone_obj = NULL; \
99 	}
100 
101 #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
102 #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
103 
104 #ifndef TRUE
105 #define TRUE 1
106 #endif
107 
108 #ifndef FALSE
109 #define FALSE 0
110 #endif
111 
112 #define ADD_PROPERTY_STRING(_table, _key, _string) \
113 	{ \
114 		zval *tmp; \
115 		MAKE_STD_ZVAL(tmp); \
116 		if (_string) { \
117 			ZVAL_STRING(tmp, (char *)_string, 1); \
118 		} else { \
119 			ZVAL_EMPTY_STRING(tmp); \
120 		} \
121 		zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
122 	}
123 
124 #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
125    { \
126        zval *tmp; \
127        MAKE_STD_ZVAL(tmp); \
128        if (_string) { \
129            ZVAL_STRINGL(tmp, (char *)_string, _len, 1); \
130        } else { \
131            ZVAL_EMPTY_STRING(tmp); \
132        } \
133        zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
134    }
135 
136 #define ADD_PROPERTY_LONG(_table, _key, _long) \
137 	{ \
138 		zval *tmp; \
139 		MAKE_STD_ZVAL(tmp); \
140 		ZVAL_LONG(tmp, _long); \
141 		zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
142 	}
143 
144 #define ADD_PROPERTY_NULL(_table, _key) \
145 	{ \
146 		zval *tmp; \
147 		MAKE_STD_ZVAL(tmp); \
148 		ZVAL_NULL(tmp); \
149 		zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
150 	}
151 
152 #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
153     { \
154        zval *tmp; \
155        MAKE_STD_ZVAL(tmp); \
156        ZVAL_BOOL(tmp, _bool); \
157        zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \
158    }
159 
160 #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
161 if (php_check_open_basedir(filename TSRMLS_CC)) { \
162 	RETURN_FALSE; \
163 } \
164 
165 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
166 	if (TG(default_config) && TG(default_config)[0]) { \
167 		if (tidyLoadConfig(_doc, TG(default_config)) < 0) { \
168 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to load Tidy configuration file at '%s'.", TG(default_config)); \
169 		} \
170 	}
171 /* }}} */
172 
173 /* {{{ ext/tidy structs
174 */
175 typedef struct _PHPTidyDoc PHPTidyDoc;
176 typedef struct _PHPTidyObj PHPTidyObj;
177 
178 typedef enum {
179 	is_node,
180 	is_doc
181 } tidy_obj_type;
182 
183 typedef enum {
184 	is_root_node,
185 	is_html_node,
186 	is_head_node,
187 	is_body_node
188 } tidy_base_nodetypes;
189 
190 struct _PHPTidyDoc {
191 	TidyDoc			doc;
192 	TidyBuffer		*errbuf;
193 	unsigned int	ref_count;
194 	unsigned int    initialized:1;
195 };
196 
197 struct _PHPTidyObj {
198 	zend_object		std;
199 	TidyNode		node;
200 	tidy_obj_type	type;
201 	PHPTidyDoc		*ptdoc;
202 };
203 /* }}} */
204 
205 /* {{{ ext/tidy prototypes
206 */
207 static char *php_tidy_file_to_mem(char *, zend_bool, int * TSRMLS_DC);
208 static void tidy_object_free_storage(void * TSRMLS_DC);
209 static zend_object_value tidy_object_new_node(zend_class_entry * TSRMLS_DC);
210 static zend_object_value tidy_object_new_doc(zend_class_entry * TSRMLS_DC);
211 static zval * tidy_instanciate(zend_class_entry *, zval * TSRMLS_DC);
212 static int tidy_doc_cast_handler(zval *, zval *, int TSRMLS_DC);
213 static int tidy_node_cast_handler(zval *, zval *, int TSRMLS_DC);
214 static void tidy_doc_update_properties(PHPTidyObj * TSRMLS_DC);
215 static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type TSRMLS_DC);
216 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType * TSRMLS_DC);
217 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
218 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval * TSRMLS_DC);
219 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options TSRMLS_DC);
220 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
221 static void _php_tidy_register_tags(INIT_FUNC_ARGS);
222 static PHP_INI_MH(php_tidy_set_clean_output);
223 static void php_tidy_clean_output_start(const char *name, size_t name_len TSRMLS_DC);
224 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags TSRMLS_DC);
225 static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
226 
227 static PHP_MINIT_FUNCTION(tidy);
228 static PHP_MSHUTDOWN_FUNCTION(tidy);
229 static PHP_RINIT_FUNCTION(tidy);
230 static PHP_MINFO_FUNCTION(tidy);
231 
232 static PHP_FUNCTION(tidy_getopt);
233 static PHP_FUNCTION(tidy_parse_string);
234 static PHP_FUNCTION(tidy_parse_file);
235 static PHP_FUNCTION(tidy_clean_repair);
236 static PHP_FUNCTION(tidy_repair_string);
237 static PHP_FUNCTION(tidy_repair_file);
238 static PHP_FUNCTION(tidy_diagnose);
239 static PHP_FUNCTION(tidy_get_output);
240 static PHP_FUNCTION(tidy_get_error_buffer);
241 static PHP_FUNCTION(tidy_get_release);
242 static PHP_FUNCTION(tidy_get_config);
243 static PHP_FUNCTION(tidy_get_status);
244 static PHP_FUNCTION(tidy_get_html_ver);
245 #if HAVE_TIDYOPTGETDOC
246 static PHP_FUNCTION(tidy_get_opt_doc);
247 #endif
248 static PHP_FUNCTION(tidy_is_xhtml);
249 static PHP_FUNCTION(tidy_is_xml);
250 static PHP_FUNCTION(tidy_error_count);
251 static PHP_FUNCTION(tidy_warning_count);
252 static PHP_FUNCTION(tidy_access_count);
253 static PHP_FUNCTION(tidy_config_count);
254 
255 static PHP_FUNCTION(tidy_get_root);
256 static PHP_FUNCTION(tidy_get_html);
257 static PHP_FUNCTION(tidy_get_head);
258 static PHP_FUNCTION(tidy_get_body);
259 
260 static TIDY_DOC_METHOD(__construct);
261 static TIDY_DOC_METHOD(parseFile);
262 static TIDY_DOC_METHOD(parseString);
263 
264 static TIDY_NODE_METHOD(hasChildren);
265 static TIDY_NODE_METHOD(hasSiblings);
266 static TIDY_NODE_METHOD(isComment);
267 static TIDY_NODE_METHOD(isHtml);
268 static TIDY_NODE_METHOD(isText);
269 static TIDY_NODE_METHOD(isJste);
270 static TIDY_NODE_METHOD(isAsp);
271 static TIDY_NODE_METHOD(isPhp);
272 static TIDY_NODE_METHOD(getParent);
273 static TIDY_NODE_METHOD(__construct);
274 /* }}} */
275 
276 ZEND_DECLARE_MODULE_GLOBALS(tidy)
277 
278 PHP_INI_BEGIN()
279 STD_PHP_INI_ENTRY("tidy.default_config",	"",		PHP_INI_SYSTEM,		OnUpdateString,				default_config,		zend_tidy_globals,	tidy_globals)
280 STD_PHP_INI_ENTRY("tidy.clean_output",		"0",	PHP_INI_USER,		php_tidy_set_clean_output,	clean_output,		zend_tidy_globals,	tidy_globals)
281 PHP_INI_END()
282 
283 /* {{{ arginfo */
284 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_string, 0, 0, 1)
285 	ZEND_ARG_INFO(0, input)
286 	ZEND_ARG_INFO(0, config_options)
287 	ZEND_ARG_INFO(0, encoding)
288 ZEND_END_ARG_INFO()
289 
290 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_error_buffer, 0)
291 ZEND_END_ARG_INFO()
292 
293 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_output, 0)
294 ZEND_END_ARG_INFO()
295 
296 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_file, 0, 0, 1)
297 	ZEND_ARG_INFO(0, file)
298 	ZEND_ARG_INFO(0, config_options)
299 	ZEND_ARG_INFO(0, encoding)
300 	ZEND_ARG_INFO(0, use_include_path)
301 ZEND_END_ARG_INFO()
302 
303 ZEND_BEGIN_ARG_INFO(arginfo_tidy_clean_repair, 0)
304 ZEND_END_ARG_INFO()
305 
306 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_string, 0, 0, 1)
307 	ZEND_ARG_INFO(0, data)
308 	ZEND_ARG_INFO(0, config_file)
309 	ZEND_ARG_INFO(0, encoding)
310 ZEND_END_ARG_INFO()
311 
312 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_file, 0, 0, 1)
313 	ZEND_ARG_INFO(0, filename)
314 	ZEND_ARG_INFO(0, config_file)
315 	ZEND_ARG_INFO(0, encoding)
316 	ZEND_ARG_INFO(0, use_include_path)
317 ZEND_END_ARG_INFO()
318 
319 ZEND_BEGIN_ARG_INFO(arginfo_tidy_diagnose, 0)
320 ZEND_END_ARG_INFO()
321 
322 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_release, 0)
323 ZEND_END_ARG_INFO()
324 
325 #if HAVE_TIDYOPTGETDOC
326 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc, 0, 0, 2)
327 	ZEND_ARG_INFO(0, resource)
328 	ZEND_ARG_INFO(0, optname)
329 ZEND_END_ARG_INFO()
330 #endif
331 
332 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_config, 0)
333 ZEND_END_ARG_INFO()
334 
335 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_status, 0)
336 ZEND_END_ARG_INFO()
337 
338 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_html_ver, 0)
339 ZEND_END_ARG_INFO()
340 
341 ZEND_BEGIN_ARG_INFO(arginfo_tidy_is_xhtml, 0)
342 ZEND_END_ARG_INFO()
343 
344 ZEND_BEGIN_ARG_INFO(arginfo_tidy_is_xml, 0)
345 ZEND_END_ARG_INFO()
346 
347 ZEND_BEGIN_ARG_INFO(arginfo_tidy_error_count, 0)
348 ZEND_END_ARG_INFO()
349 
350 ZEND_BEGIN_ARG_INFO(arginfo_tidy_warning_count, 0)
351 ZEND_END_ARG_INFO()
352 
353 ZEND_BEGIN_ARG_INFO(arginfo_tidy_access_count, 0)
354 ZEND_END_ARG_INFO()
355 
356 ZEND_BEGIN_ARG_INFO(arginfo_tidy_config_count, 0)
357 ZEND_END_ARG_INFO()
358 
359 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt, 0, 0, 1)
360 	ZEND_ARG_INFO(0, option)
361 ZEND_END_ARG_INFO()
362 
363 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_root, 0)
364 ZEND_END_ARG_INFO()
365 
366 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_html, 0)
367 ZEND_END_ARG_INFO()
368 
369 ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_head, 0)
370 ZEND_END_ARG_INFO()
371 
372 ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_body, 0, 0, 1)
373 	ZEND_ARG_INFO(0, tidy)
374 ZEND_END_ARG_INFO()
375 /* }}} */
376 
377 static const zend_function_entry tidy_functions[] = {
378 	PHP_FE(tidy_getopt,             arginfo_tidy_getopt)
379 	PHP_FE(tidy_parse_string,       arginfo_tidy_parse_string)
380 	PHP_FE(tidy_parse_file,         arginfo_tidy_parse_file)
381 	PHP_FE(tidy_get_output,         arginfo_tidy_get_output)
382 	PHP_FE(tidy_get_error_buffer,   arginfo_tidy_get_error_buffer)
383 	PHP_FE(tidy_clean_repair,       arginfo_tidy_clean_repair)
384 	PHP_FE(tidy_repair_string,	arginfo_tidy_repair_string)
385 	PHP_FE(tidy_repair_file,	arginfo_tidy_repair_file)
386 	PHP_FE(tidy_diagnose,           arginfo_tidy_diagnose)
387 	PHP_FE(tidy_get_release,	arginfo_tidy_get_release)
388 	PHP_FE(tidy_get_config,		arginfo_tidy_get_config)
389 	PHP_FE(tidy_get_status,		arginfo_tidy_get_status)
390 	PHP_FE(tidy_get_html_ver,	arginfo_tidy_get_html_ver)
391 	PHP_FE(tidy_is_xhtml,		arginfo_tidy_is_xhtml)
392 	PHP_FE(tidy_is_xml,		arginfo_tidy_is_xml)
393 	PHP_FE(tidy_error_count,	arginfo_tidy_error_count)
394 	PHP_FE(tidy_warning_count,	arginfo_tidy_warning_count)
395 	PHP_FE(tidy_access_count,	arginfo_tidy_access_count)
396 	PHP_FE(tidy_config_count,	arginfo_tidy_config_count)
397 #if HAVE_TIDYOPTGETDOC
398 	PHP_FE(tidy_get_opt_doc,	arginfo_tidy_get_opt_doc)
399 #endif
400 	PHP_FE(tidy_get_root,		arginfo_tidy_get_root)
401 	PHP_FE(tidy_get_head,		arginfo_tidy_get_head)
402 	PHP_FE(tidy_get_html,		arginfo_tidy_get_html)
403 	PHP_FE(tidy_get_body,		arginfo_tidy_get_body)
404 	PHP_FE_END
405 };
406 
407 static const zend_function_entry tidy_funcs_doc[] = {
408 	TIDY_METHOD_MAP(getOpt, tidy_getopt, NULL)
409 	TIDY_METHOD_MAP(cleanRepair, tidy_clean_repair, NULL)
410 	TIDY_DOC_ME(parseFile, NULL)
411 	TIDY_DOC_ME(parseString, NULL)
412 	TIDY_METHOD_MAP(repairString, tidy_repair_string, NULL)
413 	TIDY_METHOD_MAP(repairFile, tidy_repair_file, NULL)
414 	TIDY_METHOD_MAP(diagnose, tidy_diagnose, NULL)
415 	TIDY_METHOD_MAP(getRelease, tidy_get_release, NULL)
416 	TIDY_METHOD_MAP(getConfig, tidy_get_config, NULL)
417 	TIDY_METHOD_MAP(getStatus, tidy_get_status, NULL)
418 	TIDY_METHOD_MAP(getHtmlVer, tidy_get_html_ver, NULL)
419 #if HAVE_TIDYOPTGETDOC
420 	TIDY_METHOD_MAP(getOptDoc, tidy_get_opt_doc, NULL)
421 #endif
422 	TIDY_METHOD_MAP(isXhtml, tidy_is_xhtml, NULL)
423 	TIDY_METHOD_MAP(isXml, tidy_is_xml, NULL)
424 	TIDY_METHOD_MAP(root, tidy_get_root, NULL)
425 	TIDY_METHOD_MAP(head, tidy_get_head, NULL)
426 	TIDY_METHOD_MAP(html, tidy_get_html, NULL)
427 	TIDY_METHOD_MAP(body, tidy_get_body, NULL)
428 	TIDY_DOC_ME(__construct, NULL)
429 	PHP_FE_END
430 };
431 
432 static const zend_function_entry tidy_funcs_node[] = {
433 	TIDY_NODE_ME(hasChildren, NULL)
434 	TIDY_NODE_ME(hasSiblings, NULL)
435 	TIDY_NODE_ME(isComment, NULL)
436 	TIDY_NODE_ME(isHtml, NULL)
437 	TIDY_NODE_ME(isText, NULL)
438 	TIDY_NODE_ME(isJste, NULL)
439 	TIDY_NODE_ME(isAsp, NULL)
440 	TIDY_NODE_ME(isPhp, NULL)
441 	TIDY_NODE_ME(getParent, NULL)
442 	TIDY_NODE_PRIVATE_ME(__construct, NULL)
443 	PHP_FE_END
444 };
445 
446 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
447 
448 static zend_object_handlers tidy_object_handlers_doc;
449 static zend_object_handlers tidy_object_handlers_node;
450 
451 zend_module_entry tidy_module_entry = {
452 	STANDARD_MODULE_HEADER,
453 	"tidy",
454 	tidy_functions,
455 	PHP_MINIT(tidy),
456 	PHP_MSHUTDOWN(tidy),
457 	PHP_RINIT(tidy),
458 	NULL,
459 	PHP_MINFO(tidy),
460 	PHP_TIDY_MODULE_VERSION,
461 	PHP_MODULE_GLOBALS(tidy),
462 	NULL,
463 	NULL,
464 	NULL,
465 	STANDARD_MODULE_PROPERTIES_EX
466 };
467 
468 #ifdef COMPILE_DL_TIDY
ZEND_GET_MODULE(tidy)469 ZEND_GET_MODULE(tidy)
470 #endif
471 
472 static void* TIDY_CALL php_tidy_malloc(size_t len)
473 {
474 	return emalloc(len);
475 }
476 
php_tidy_realloc(void * buf,size_t len)477 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
478 {
479 	return erealloc(buf, len);
480 }
481 
php_tidy_free(void * buf)482 static void TIDY_CALL php_tidy_free(void *buf)
483 {
484 	efree(buf);
485 }
486 
php_tidy_panic(ctmbstr msg)487 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
488 {
489 	TSRMLS_FETCH();
490 	php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
491 }
492 
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value TSRMLS_DC)493 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value TSRMLS_DC)
494 {
495 	TidyOption opt = tidyGetOptionByName(doc, optname);
496 	zval conv = *value;
497 
498 	if (!opt) {
499 		php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname);
500 		return FAILURE;
501 	}
502 
503 	if (tidyOptIsReadOnly(opt)) {
504 		php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Attempting to set read-only option '%s'", optname);
505 		return FAILURE;
506 	}
507 
508 	switch(tidyOptGetType(opt)) {
509 		case TidyString:
510 			if (Z_TYPE(conv) != IS_STRING) {
511 				zval_copy_ctor(&conv);
512 				convert_to_string(&conv);
513 			}
514 			if (tidyOptSetValue(doc, tidyOptGetId(opt), Z_STRVAL(conv))) {
515 				if (Z_TYPE(conv) != Z_TYPE_P(value)) {
516 					zval_dtor(&conv);
517 				}
518 				return SUCCESS;
519 			}
520 			if (Z_TYPE(conv) != Z_TYPE_P(value)) {
521 				zval_dtor(&conv);
522 			}
523 			break;
524 
525 		case TidyInteger:
526 			if (Z_TYPE(conv) != IS_LONG) {
527 				zval_copy_ctor(&conv);
528 				convert_to_long(&conv);
529 			}
530 			if (tidyOptSetInt(doc, tidyOptGetId(opt), Z_LVAL(conv))) {
531 				return SUCCESS;
532 			}
533 			break;
534 
535 		case TidyBoolean:
536 			if (Z_TYPE(conv) != IS_LONG) {
537 				zval_copy_ctor(&conv);
538 				convert_to_long(&conv);
539 			}
540 			if (tidyOptSetBool(doc, tidyOptGetId(opt), Z_LVAL(conv))) {
541 				return SUCCESS;
542 			}
543 			break;
544 
545 		default:
546 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to determine type of configuration option");
547 			break;
548 	}
549 
550 	return FAILURE;
551 }
552 
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,zend_bool is_file)553 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
554 {
555 	char *data=NULL, *arg1, *enc = NULL;
556 	int arg1_len, enc_len = 0, data_len = 0;
557 	zend_bool use_include_path = 0;
558 	TidyDoc doc;
559 	TidyBuffer *errbuf;
560 	zval **config = NULL;
561 
562 	if (is_file) {
563 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "p|Zsb", &arg1, &arg1_len, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
564 			RETURN_FALSE;
565 		}
566 		if (!(data = php_tidy_file_to_mem(arg1, use_include_path, &data_len TSRMLS_CC))) {
567 			RETURN_FALSE;
568 		}
569 	} else {
570 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|Zsb", &arg1, &arg1_len, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
571 			RETURN_FALSE;
572 		}
573 		data = arg1;
574 		data_len = arg1_len;
575 	}
576 
577 	doc = tidyCreate();
578 	errbuf = emalloc(sizeof(TidyBuffer));
579 	tidyBufInit(errbuf);
580 
581 	if (tidySetErrorBuffer(doc, errbuf) != 0) {
582 		tidyBufFree(errbuf);
583 		efree(errbuf);
584 		tidyRelease(doc);
585 		php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not set Tidy error buffer");
586 	}
587 
588 	tidyOptSetBool(doc, TidyForceOutput, yes);
589 	tidyOptSetBool(doc, TidyMark, no);
590 
591 	TIDY_SET_DEFAULT_CONFIG(doc);
592 
593 	if (config) {
594 		TIDY_APPLY_CONFIG_ZVAL(doc, config);
595 	}
596 
597 	if(enc_len) {
598 		if (tidySetCharEncoding(doc, enc) < 0) {
599 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not set encoding '%s'", enc);
600 			RETVAL_FALSE;
601 		}
602 	}
603 
604 	if (data) {
605 		TidyBuffer buf;
606 
607 		tidyBufInit(&buf);
608 		tidyBufAttach(&buf, (byte *) data, data_len);
609 
610 		if (tidyParseBuffer(doc, &buf) < 0) {
611 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", errbuf->bp);
612 			RETVAL_FALSE;
613 		} else {
614 			if (tidyCleanAndRepair(doc) >= 0) {
615 				TidyBuffer output;
616 				tidyBufInit(&output);
617 
618 				tidySaveBuffer (doc, &output);
619 				FIX_BUFFER(&output);
620 				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0, 1);
621 				tidyBufFree(&output);
622 			} else {
623 				RETVAL_FALSE;
624 			}
625 		}
626 	}
627 
628 	if (is_file) {
629 		efree(data);
630 	}
631 
632 	tidyBufFree(errbuf);
633 	efree(errbuf);
634 	tidyRelease(doc);
635 }
636 
php_tidy_file_to_mem(char * filename,zend_bool use_include_path,int * len TSRMLS_DC)637 static char *php_tidy_file_to_mem(char *filename, zend_bool use_include_path, int *len TSRMLS_DC)
638 {
639 	php_stream *stream;
640 	char *data = NULL;
641 
642 	if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
643 		return NULL;
644 	}
645 	if ((*len = (int) php_stream_copy_to_mem(stream, (void*) &data, PHP_STREAM_COPY_ALL, 0)) == 0) {
646 		data = estrdup("");
647 		*len = 0;
648 	}
649 	php_stream_close(stream);
650 
651 	return data;
652 }
653 
tidy_object_free_storage(void * object TSRMLS_DC)654 static void tidy_object_free_storage(void *object TSRMLS_DC)
655 {
656 	PHPTidyObj *intern = (PHPTidyObj *)object;
657 
658 	zend_object_std_dtor(&intern->std TSRMLS_CC);
659 
660 	if (intern->ptdoc) {
661 		intern->ptdoc->ref_count--;
662 
663 		if (intern->ptdoc->ref_count <= 0) {
664 			tidyBufFree(intern->ptdoc->errbuf);
665 			efree(intern->ptdoc->errbuf);
666 			tidyRelease(intern->ptdoc->doc);
667 			efree(intern->ptdoc);
668 		}
669 	}
670 
671 	efree(object);
672 }
673 
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,zend_object_value * retval,tidy_obj_type objtype TSRMLS_DC)674 static void tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers,
675 							zend_object_value *retval, tidy_obj_type objtype TSRMLS_DC)
676 {
677 	PHPTidyObj *intern;
678 
679 	intern = emalloc(sizeof(PHPTidyObj));
680 	memset(intern, 0, sizeof(PHPTidyObj));
681 	zend_object_std_init(&intern->std, class_type TSRMLS_CC);
682 	object_properties_init(&intern->std, class_type);
683 
684 	switch(objtype) {
685 		case is_node:
686 			break;
687 
688 		case is_doc:
689 			intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
690 			intern->ptdoc->doc = tidyCreate();
691 			intern->ptdoc->ref_count = 1;
692 			intern->ptdoc->initialized = 0;
693 			intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
694 			tidyBufInit(intern->ptdoc->errbuf);
695 
696 			if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
697 				tidyBufFree(intern->ptdoc->errbuf);
698 				efree(intern->ptdoc->errbuf);
699 				tidyRelease(intern->ptdoc->doc);
700 				efree(intern->ptdoc);
701 				efree(intern);
702 				php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not set Tidy error buffer");
703 			}
704 
705 			tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
706 			tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
707 
708 			TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
709 
710 			tidy_add_default_properties(intern, is_doc TSRMLS_CC);
711 			break;
712 	}
713 
714 	retval->handle = zend_objects_store_put(intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, (zend_objects_free_object_storage_t) tidy_object_free_storage, NULL TSRMLS_CC);
715 	retval->handlers = handlers;
716 }
717 
tidy_object_new_node(zend_class_entry * class_type TSRMLS_DC)718 static zend_object_value tidy_object_new_node(zend_class_entry *class_type TSRMLS_DC)
719 {
720 	zend_object_value retval;
721 	tidy_object_new(class_type, &tidy_object_handlers_node, &retval, is_node TSRMLS_CC);
722 	return retval;
723 }
724 
tidy_object_new_doc(zend_class_entry * class_type TSRMLS_DC)725 static zend_object_value tidy_object_new_doc(zend_class_entry *class_type TSRMLS_DC)
726 {
727 	zend_object_value retval;
728 	tidy_object_new(class_type, &tidy_object_handlers_doc, &retval, is_doc TSRMLS_CC);
729 	return retval;
730 }
731 
tidy_instanciate(zend_class_entry * pce,zval * object TSRMLS_DC)732 static zval * tidy_instanciate(zend_class_entry *pce, zval *object TSRMLS_DC)
733 {
734 	if (!object) {
735 		ALLOC_ZVAL(object);
736 	}
737 
738 	Z_TYPE_P(object) = IS_OBJECT;
739 	object_init_ex(object, pce);
740 	Z_SET_REFCOUNT_P(object, 1);
741 	Z_SET_ISREF_P(object);
742 	return object;
743 }
744 
tidy_doc_cast_handler(zval * in,zval * out,int type TSRMLS_DC)745 static int tidy_doc_cast_handler(zval *in, zval *out, int type TSRMLS_DC)
746 {
747 	TidyBuffer output;
748 	PHPTidyObj *obj;
749 
750 	switch(type) {
751 		case IS_LONG:
752 			ZVAL_LONG(out, 0);
753 			break;
754 
755 		case IS_DOUBLE:
756 			ZVAL_DOUBLE(out, 0);
757 			break;
758 
759 		case IS_BOOL:
760 			ZVAL_BOOL(out, TRUE);
761 			break;
762 
763 		case IS_STRING:
764 			obj = (PHPTidyObj *)zend_object_store_get_object(in TSRMLS_CC);
765 			tidyBufInit(&output);
766 			tidySaveBuffer (obj->ptdoc->doc, &output);
767 			ZVAL_STRINGL(out, (char *) output.bp, output.size ? output.size-1 : 0, 1);
768 			tidyBufFree(&output);
769 			break;
770 
771 		default:
772 			return FAILURE;
773 	}
774 
775 	return SUCCESS;
776 }
777 
tidy_node_cast_handler(zval * in,zval * out,int type TSRMLS_DC)778 static int tidy_node_cast_handler(zval *in, zval *out, int type TSRMLS_DC)
779 {
780 	TidyBuffer buf;
781 	PHPTidyObj *obj;
782 
783 	switch(type) {
784 		case IS_LONG:
785 			ZVAL_LONG(out, 0);
786 			break;
787 
788 		case IS_DOUBLE:
789 			ZVAL_DOUBLE(out, 0);
790 			break;
791 
792 		case IS_BOOL:
793 			ZVAL_BOOL(out, TRUE);
794 			break;
795 
796 		case IS_STRING:
797 			obj = (PHPTidyObj *)zend_object_store_get_object(in TSRMLS_CC);
798 			tidyBufInit(&buf);
799 			if (obj->ptdoc) {
800 				tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
801 				ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1, 1);
802 			} else {
803 				ZVAL_EMPTY_STRING(out);
804 			}
805 			tidyBufFree(&buf);
806 			break;
807 
808 		default:
809 			return FAILURE;
810 	}
811 
812 	return SUCCESS;
813 }
814 
tidy_doc_update_properties(PHPTidyObj * obj TSRMLS_DC)815 static void tidy_doc_update_properties(PHPTidyObj *obj TSRMLS_DC)
816 {
817 
818 	TidyBuffer output;
819 	zval *temp;
820 
821 	tidyBufInit(&output);
822 	tidySaveBuffer (obj->ptdoc->doc, &output);
823 
824 	if (output.size) {
825 		if (!obj->std.properties) {
826 			rebuild_object_properties(&obj->std);
827 		}
828 		MAKE_STD_ZVAL(temp);
829 		ZVAL_STRINGL(temp, (char*)output.bp, output.size-1, TRUE);
830 		zend_hash_update(obj->std.properties, "value", sizeof("value"), (void *)&temp, sizeof(zval *), NULL);
831 	}
832 
833 	tidyBufFree(&output);
834 
835 	if (obj->ptdoc->errbuf->size) {
836 		if (!obj->std.properties) {
837 			rebuild_object_properties(&obj->std);
838 		}
839 		MAKE_STD_ZVAL(temp);
840 		ZVAL_STRINGL(temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1, TRUE);
841 		zend_hash_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer"), (void *)&temp, sizeof(zval *), NULL);
842 	}
843 }
844 
tidy_add_default_properties(PHPTidyObj * obj,tidy_obj_type type TSRMLS_DC)845 static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type TSRMLS_DC)
846 {
847 
848 	TidyBuffer buf;
849 	TidyAttr	tempattr;
850 	TidyNode	tempnode;
851 	zval *attribute, *children, *temp;
852 	PHPTidyObj *newobj;
853 
854 	switch(type) {
855 
856 		case is_node:
857 			if (!obj->std.properties) {
858 				rebuild_object_properties(&obj->std);
859 			}
860 			tidyBufInit(&buf);
861 			tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
862 			ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
863 			tidyBufFree(&buf);
864 
865 			ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
866 			ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
867 			ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
868 			ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
869 			ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
870 
871 			switch(tidyNodeGetType(obj->node)) {
872 				case TidyNode_Root:
873 				case TidyNode_DocType:
874 				case TidyNode_Text:
875 				case TidyNode_Comment:
876 					break;
877 
878 				default:
879 					ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
880 			}
881 
882 			tempattr = tidyAttrFirst(obj->node);
883 			MAKE_STD_ZVAL(attribute);
884 
885 			if (tempattr) {
886 				char *name, *val;
887 				array_init(attribute);
888 
889 				do {
890 					name = (char *)tidyAttrName(tempattr);
891 					val = (char *)tidyAttrValue(tempattr);
892 					if (name && val) {
893 						add_assoc_string(attribute, name, val, TRUE);
894 					}
895 				} while((tempattr = tidyAttrNext(tempattr)));
896 			} else {
897 				ZVAL_NULL(attribute);
898 			}
899 			zend_hash_update(obj->std.properties, "attribute", sizeof("attribute"), (void *)&attribute, sizeof(zval *), NULL);
900 
901 			tempnode = tidyGetChild(obj->node);
902 
903 			MAKE_STD_ZVAL(children);
904 			if (tempnode) {
905 				array_init(children);
906 				do {
907 					MAKE_STD_ZVAL(temp);
908 					tidy_instanciate(tidy_ce_node, temp TSRMLS_CC);
909 					newobj = (PHPTidyObj *) zend_object_store_get_object(temp TSRMLS_CC);
910 					newobj->node = tempnode;
911 					newobj->type = is_node;
912 					newobj->ptdoc = obj->ptdoc;
913 					newobj->ptdoc->ref_count++;
914 
915 					tidy_add_default_properties(newobj, is_node TSRMLS_CC);
916 					add_next_index_zval(children, temp);
917 
918 				} while((tempnode = tidyGetNext(tempnode)));
919 
920 			} else {
921 				ZVAL_NULL(children);
922 			}
923 
924 			zend_hash_update(obj->std.properties, "child", sizeof("child"), (void *)&children, sizeof(zval *), NULL);
925 
926 			break;
927 
928 		case is_doc:
929 			if (!obj->std.properties) {
930 				rebuild_object_properties(&obj->std);
931 			}
932 			ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
933 			ADD_PROPERTY_NULL(obj->std.properties, value);
934 			break;
935 	}
936 }
937 
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type TSRMLS_DC)938 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type TSRMLS_DC)
939 {
940 	*type = tidyOptGetType(opt);
941 
942 	switch (*type) {
943 		case TidyString: {
944 			char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
945 			if (val) {
946 				return (void *) estrdup(val);
947 			} else {
948 				return (void *) estrdup("");
949 			}
950 		}
951 			break;
952 
953 		case TidyInteger:
954 			return (void *) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
955 			break;
956 
957 		case TidyBoolean:
958 			return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
959 			break;
960 	}
961 
962 	/* should not happen */
963 	return NULL;
964 }
965 
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)966 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
967 {
968 	PHPTidyObj *newobj;
969 	TidyNode node;
970 	TIDY_FETCH_OBJECT;
971 
972 	switch (node_type) {
973 		case is_root_node:
974 			node = tidyGetRoot(obj->ptdoc->doc);
975 			break;
976 
977 		case is_html_node:
978 			node = tidyGetHtml(obj->ptdoc->doc);
979 			break;
980 
981 		case is_head_node:
982 			node = tidyGetHead(obj->ptdoc->doc);
983 			break;
984 
985 		case is_body_node:
986 			node = tidyGetBody(obj->ptdoc->doc);
987 			break;
988 
989 		default:
990 			RETURN_NULL();
991 			break;
992 	}
993 
994 	if (!node) {
995 		RETURN_NULL();
996 	}
997 
998 	tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC);
999 	newobj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
1000 	newobj->type  = is_node;
1001 	newobj->ptdoc = obj->ptdoc;
1002 	newobj->node  = node;
1003 	newobj->ptdoc->ref_count++;
1004 
1005 	tidy_add_default_properties(newobj, is_node TSRMLS_CC);
1006 }
1007 
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options TSRMLS_DC)1008 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options TSRMLS_DC)
1009 {
1010 	char *opt_name;
1011 	zval **opt_val;
1012 	ulong opt_indx;
1013 	uint opt_name_len;
1014 	zend_bool clear_str;
1015 
1016 	for (zend_hash_internal_pointer_reset(ht_options);
1017 		 zend_hash_get_current_data(ht_options, (void *) &opt_val) == SUCCESS;
1018 		 zend_hash_move_forward(ht_options)) {
1019 
1020 		switch (zend_hash_get_current_key_ex(ht_options, &opt_name, &opt_name_len, &opt_indx, FALSE, NULL)) {
1021 			case HASH_KEY_IS_STRING:
1022 			clear_str = 0;
1023 			break;
1024 
1025 			case HASH_KEY_IS_LONG:
1026 			continue; /* ignore numeric keys */
1027 
1028 			default:
1029 			php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not retrieve key from option array");
1030 			return FAILURE;
1031 		}
1032 
1033 		_php_tidy_set_tidy_opt(doc, opt_name, *opt_val TSRMLS_CC);
1034 		if (clear_str) {
1035 			efree(opt_name);
1036 		}
1037 	}
1038 
1039 	return SUCCESS;
1040 }
1041 
php_tidy_parse_string(PHPTidyObj * obj,char * string,int len,char * enc TSRMLS_DC)1042 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, int len, char *enc TSRMLS_DC)
1043 {
1044 	TidyBuffer buf;
1045 
1046 	if(enc) {
1047 		if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
1048 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not set encoding '%s'", enc);
1049 			return FAILURE;
1050 		}
1051 	}
1052 
1053 	obj->ptdoc->initialized = 1;
1054 
1055 	tidyBufInit(&buf);
1056 	tidyBufAttach(&buf, (byte *) string, len);
1057 	if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
1058 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
1059 		return FAILURE;
1060 	}
1061 	tidy_doc_update_properties(obj TSRMLS_CC);
1062 
1063 	return SUCCESS;
1064 }
1065 
PHP_MINIT_FUNCTION(tidy)1066 static PHP_MINIT_FUNCTION(tidy)
1067 {
1068 	tidySetMallocCall(php_tidy_malloc);
1069 	tidySetReallocCall(php_tidy_realloc);
1070 	tidySetFreeCall(php_tidy_free);
1071 	tidySetPanicCall(php_tidy_panic);
1072 
1073 	REGISTER_INI_ENTRIES();
1074 	REGISTER_TIDY_CLASS(tidy, doc,	NULL, 0);
1075 	REGISTER_TIDY_CLASS(tidyNode, node,	NULL, ZEND_ACC_FINAL_CLASS);
1076 
1077 	tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
1078 	tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
1079 
1080 	_php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU);
1081 	_php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU);
1082 
1083 	php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init TSRMLS_CC);
1084 
1085 	return SUCCESS;
1086 }
1087 
PHP_RINIT_FUNCTION(tidy)1088 static PHP_RINIT_FUNCTION(tidy)
1089 {
1090 	php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler") TSRMLS_CC);
1091 
1092 	return SUCCESS;
1093 }
1094 
PHP_MSHUTDOWN_FUNCTION(tidy)1095 static PHP_MSHUTDOWN_FUNCTION(tidy)
1096 {
1097 	UNREGISTER_INI_ENTRIES();
1098 	return SUCCESS;
1099 }
1100 
PHP_MINFO_FUNCTION(tidy)1101 static PHP_MINFO_FUNCTION(tidy)
1102 {
1103 	php_info_print_table_start();
1104 	php_info_print_table_header(2, "Tidy support", "enabled");
1105 	php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
1106 	php_info_print_table_row(2, "Extension Version", PHP_TIDY_MODULE_VERSION " ($Id: 57f050b275c6da348310461a64aaad21feef8091 $)");
1107 	php_info_print_table_end();
1108 
1109 	DISPLAY_INI_ENTRIES();
1110 }
1111 
PHP_INI_MH(php_tidy_set_clean_output)1112 static PHP_INI_MH(php_tidy_set_clean_output)
1113 {
1114 	int status;
1115 	zend_bool value;
1116 
1117 	if (new_value_length==2 && strcasecmp("on", new_value)==0) {
1118 		value = (zend_bool) 1;
1119 	} else if (new_value_length==3 && strcasecmp("yes", new_value)==0) {
1120 		value = (zend_bool) 1;
1121 	} else if (new_value_length==4 && strcasecmp("true", new_value)==0) {
1122 		value = (zend_bool) 1;
1123 	} else {
1124 		value = (zend_bool) atoi(new_value);
1125 	}
1126 
1127 	if (stage == PHP_INI_STAGE_RUNTIME) {
1128 		status = php_output_get_status(TSRMLS_C);
1129 
1130 		if (value && (status & PHP_OUTPUT_WRITTEN)) {
1131 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
1132 			return FAILURE;
1133 		}
1134 		if (status & PHP_OUTPUT_SENT) {
1135 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
1136 			return FAILURE;
1137 		}
1138 	}
1139 
1140 	status = OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1141 
1142 	if (stage == PHP_INI_STAGE_RUNTIME && value) {
1143 		if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler") TSRMLS_CC)) {
1144 			php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler") TSRMLS_CC);
1145 		}
1146 	}
1147 
1148 	return status;
1149 }
1150 
1151 /*
1152  * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
1153  */
1154 
php_tidy_clean_output_start(const char * name,size_t name_len TSRMLS_DC)1155 static void php_tidy_clean_output_start(const char *name, size_t name_len TSRMLS_DC)
1156 {
1157 	php_output_handler *h;
1158 
1159 	if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS TSRMLS_CC))) {
1160 		php_output_handler_start(h TSRMLS_CC);
1161 	}
1162 }
1163 
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags TSRMLS_DC)1164 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags TSRMLS_DC)
1165 {
1166 	if (chunk_size) {
1167 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
1168 		return NULL;
1169 	}
1170 	if (!TG(clean_output)) {
1171 		TG(clean_output) = 1;
1172 	}
1173 	return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags TSRMLS_CC);
1174 }
1175 
php_tidy_output_handler(void ** nothing,php_output_context * output_context)1176 static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
1177 {
1178 	int status = FAILURE;
1179 	TidyDoc doc;
1180 	TidyBuffer inbuf, outbuf, errbuf;
1181 	PHP_OUTPUT_TSRMLS(output_context);
1182 
1183 	if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
1184 		doc = tidyCreate();
1185 		tidyBufInit(&errbuf);
1186 
1187 		if (0 == tidySetErrorBuffer(doc, &errbuf)) {
1188 			tidyOptSetBool(doc, TidyForceOutput, yes);
1189 			tidyOptSetBool(doc, TidyMark, no);
1190 
1191 			TIDY_SET_DEFAULT_CONFIG(doc);
1192 
1193 			tidyBufInit(&inbuf);
1194 			tidyBufAttach(&inbuf, (byte *) output_context->in.data, output_context->in.used);
1195 
1196 			if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
1197 				tidyBufInit(&outbuf);
1198 				tidySaveBuffer(doc, &outbuf);
1199 				FIX_BUFFER(&outbuf);
1200 				output_context->out.data = (char *) outbuf.bp;
1201 				output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
1202 				output_context->out.free = 1;
1203 				status = SUCCESS;
1204 			}
1205 		}
1206 
1207 		tidyRelease(doc);
1208 		tidyBufFree(&errbuf);
1209 	}
1210 
1211 	return status;
1212 }
1213 
1214 /* {{{ proto bool tidy_parse_string(string input [, mixed config_options [, string encoding]])
1215    Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1216 static PHP_FUNCTION(tidy_parse_string)
1217 {
1218 	char *input, *enc = NULL;
1219 	int input_len, enc_len = 0;
1220 	zval **options = NULL;
1221 	PHPTidyObj *obj;
1222 
1223 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|Zs", &input, &input_len, &options, &enc, &enc_len) == FAILURE) {
1224 		RETURN_FALSE;
1225 	}
1226 
1227 	tidy_instanciate(tidy_ce_doc, return_value TSRMLS_CC);
1228 	obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
1229 
1230 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1231 
1232 	if(php_tidy_parse_string(obj, input, input_len, enc TSRMLS_CC) == FAILURE) {
1233 		zval_dtor(return_value);
1234 		INIT_ZVAL(*return_value);
1235 		RETURN_FALSE;
1236 	}
1237 }
1238 /* }}} */
1239 
1240 /* {{{ proto string tidy_get_error_buffer()
1241    Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1242 static PHP_FUNCTION(tidy_get_error_buffer)
1243 {
1244 	TIDY_FETCH_OBJECT;
1245 
1246 	if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1247 		RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1, 1);
1248 	} else {
1249 		RETURN_FALSE;
1250 	}
1251 }
1252 /* }}} */
1253 
1254 /* {{{ proto string tidy_get_output()
1255    Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1256 static PHP_FUNCTION(tidy_get_output)
1257 {
1258 	TidyBuffer output;
1259 	TIDY_FETCH_OBJECT;
1260 
1261 	tidyBufInit(&output);
1262 	tidySaveBuffer(obj->ptdoc->doc, &output);
1263 	FIX_BUFFER(&output);
1264 	RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0, 1);
1265 	tidyBufFree(&output);
1266 }
1267 /* }}} */
1268 
1269 /* {{{ proto boolean tidy_parse_file(string file [, mixed config_options [, string encoding [, bool use_include_path]]])
1270    Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1271 static PHP_FUNCTION(tidy_parse_file)
1272 {
1273 	char *inputfile, *enc = NULL;
1274 	int input_len, contents_len, enc_len = 0;
1275 	zend_bool use_include_path = 0;
1276 	char *contents;
1277 	zval **options = NULL;
1278 
1279 	PHPTidyObj *obj;
1280 
1281 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "p|Zsb", &inputfile, &input_len,
1282 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1283 		RETURN_FALSE;
1284 	}
1285 
1286 	tidy_instanciate(tidy_ce_doc, return_value TSRMLS_CC);
1287 	obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
1288 
1289 	if (!(contents = php_tidy_file_to_mem(inputfile, use_include_path, &contents_len TSRMLS_CC))) {
1290 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot Load '%s' into memory%s", inputfile, (use_include_path) ? " (Using include path)" : "");
1291 		RETURN_FALSE;
1292 	}
1293 
1294 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1295 
1296 	if(php_tidy_parse_string(obj, contents, contents_len, enc TSRMLS_CC) == FAILURE) {
1297 		zval_dtor(return_value);
1298 		INIT_ZVAL(*return_value);
1299 		RETVAL_FALSE;
1300 	}
1301 
1302 	efree(contents);
1303 }
1304 /* }}} */
1305 
1306 /* {{{ proto boolean tidy_clean_repair()
1307    Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1308 static PHP_FUNCTION(tidy_clean_repair)
1309 {
1310 	TIDY_FETCH_OBJECT;
1311 
1312 	if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1313 		tidy_doc_update_properties(obj TSRMLS_CC);
1314 		RETURN_TRUE;
1315 	}
1316 
1317 	RETURN_FALSE;
1318 }
1319 /* }}} */
1320 
1321 /* {{{ proto boolean tidy_repair_string(string data [, mixed config_file [, string encoding]])
1322    Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1323 static PHP_FUNCTION(tidy_repair_string)
1324 {
1325 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, FALSE);
1326 }
1327 /* }}} */
1328 
1329 /* {{{ proto boolean tidy_repair_file(string filename [, mixed config_file [, string encoding [, bool use_include_path]]])
1330    Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1331 static PHP_FUNCTION(tidy_repair_file)
1332 {
1333 	php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, TRUE);
1334 }
1335 /* }}} */
1336 
1337 /* {{{ proto boolean tidy_diagnose()
1338    Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1339 static PHP_FUNCTION(tidy_diagnose)
1340 {
1341 	TIDY_FETCH_OBJECT;
1342 
1343 	if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1344 		tidy_doc_update_properties(obj TSRMLS_CC);
1345 		RETURN_TRUE;
1346 	}
1347 
1348 	RETURN_FALSE;
1349 }
1350 /* }}} */
1351 
1352 /* {{{ proto string tidy_get_release()
1353    Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1354 static PHP_FUNCTION(tidy_get_release)
1355 {
1356 	if (zend_parse_parameters_none() == FAILURE) {
1357 		return;
1358 	}
1359 
1360 	RETURN_STRING((char *)tidyReleaseDate(), 1);
1361 }
1362 /* }}} */
1363 
1364 
1365 #if HAVE_TIDYOPTGETDOC
1366 /* {{{ proto string tidy_get_opt_doc(tidy resource, string optname)
1367    Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1368 static PHP_FUNCTION(tidy_get_opt_doc)
1369 {
1370 	PHPTidyObj *obj;
1371 	char *optval, *optname;
1372 	int optname_len;
1373 	TidyOption opt;
1374 
1375 	TIDY_SET_CONTEXT;
1376 
1377 	if (object) {
1378 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &optname, &optname_len) == FAILURE) {
1379 			RETURN_FALSE;
1380 		}
1381 	} else {
1382 		if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1383 			RETURN_FALSE;
1384 		}
1385 	}
1386 
1387 	obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC);
1388 
1389 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1390 
1391 	if (!opt) {
1392 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
1393 		RETURN_FALSE;
1394 	}
1395 
1396 	if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1397 		RETURN_STRING(optval, 1);
1398 	}
1399 
1400 	RETURN_FALSE;
1401 }
1402 /* }}} */
1403 #endif
1404 
1405 
1406 /* {{{ proto array tidy_get_config()
1407    Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1408 static PHP_FUNCTION(tidy_get_config)
1409 {
1410 	TidyIterator itOpt;
1411 	char *opt_name;
1412 	void *opt_value;
1413 	TidyOptionType optt;
1414 
1415 	TIDY_FETCH_OBJECT;
1416 
1417 	itOpt = tidyGetOptionList(obj->ptdoc->doc);
1418 
1419 	array_init(return_value);
1420 
1421 	while (itOpt) {
1422 		TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1423 
1424 		opt_name = (char *)tidyOptGetName(opt);
1425 		opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt TSRMLS_CC);
1426 		switch (optt) {
1427 			case TidyString:
1428 				add_assoc_string(return_value, opt_name, (char*)opt_value, 0);
1429 				break;
1430 
1431 			case TidyInteger:
1432 				add_assoc_long(return_value, opt_name, (long)opt_value);
1433 				break;
1434 
1435 			case TidyBoolean:
1436 				add_assoc_bool(return_value, opt_name, (long)opt_value);
1437 				break;
1438 		}
1439 	}
1440 
1441 	return;
1442 }
1443 /* }}} */
1444 
1445 /* {{{ proto int tidy_get_status()
1446    Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1447 static PHP_FUNCTION(tidy_get_status)
1448 {
1449 	TIDY_FETCH_OBJECT;
1450 
1451 	RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1452 }
1453 /* }}} */
1454 
1455 /* {{{ proto int tidy_get_html_ver()
1456    Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1457 static PHP_FUNCTION(tidy_get_html_ver)
1458 {
1459 	TIDY_FETCH_OBJECT;
1460 
1461 	RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1462 }
1463 /* }}} */
1464 
1465 /* {{{ proto boolean tidy_is_xhtml()
1466    Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1467 static PHP_FUNCTION(tidy_is_xhtml)
1468 {
1469 	TIDY_FETCH_OBJECT;
1470 
1471 	RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1472 }
1473 /* }}} */
1474 
1475 /* {{{ proto boolean tidy_is_xml()
1476    Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1477 static PHP_FUNCTION(tidy_is_xml)
1478 {
1479 	TIDY_FETCH_OBJECT;
1480 
1481 	RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1482 }
1483 /* }}} */
1484 
1485 /* {{{ proto int tidy_error_count()
1486    Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1487 static PHP_FUNCTION(tidy_error_count)
1488 {
1489 	TIDY_FETCH_OBJECT;
1490 
1491 	RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1492 }
1493 /* }}} */
1494 
1495 /* {{{ proto int tidy_warning_count()
1496    Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1497 static PHP_FUNCTION(tidy_warning_count)
1498 {
1499 	TIDY_FETCH_OBJECT;
1500 
1501 	RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1502 }
1503 /* }}} */
1504 
1505 /* {{{ proto int tidy_access_count()
1506    Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1507 static PHP_FUNCTION(tidy_access_count)
1508 {
1509 	TIDY_FETCH_OBJECT;
1510 
1511 	RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1512 }
1513 /* }}} */
1514 
1515 /* {{{ proto int tidy_config_count()
1516    Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1517 static PHP_FUNCTION(tidy_config_count)
1518 {
1519 	TIDY_FETCH_OBJECT;
1520 
1521 	RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1522 }
1523 /* }}} */
1524 
1525 /* {{{ proto mixed tidy_getopt(string option)
1526    Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1527 static PHP_FUNCTION(tidy_getopt)
1528 {
1529 	PHPTidyObj *obj;
1530 	char *optname;
1531 	void *optval;
1532 	int optname_len;
1533 	TidyOption opt;
1534 	TidyOptionType optt;
1535 
1536 	TIDY_SET_CONTEXT;
1537 
1538 	if (object) {
1539 		if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &optname, &optname_len) == FAILURE) {
1540 			RETURN_FALSE;
1541 		}
1542 	} else {
1543 		if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, NULL, "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1544 			RETURN_FALSE;
1545 		}
1546 	}
1547 
1548 	obj = (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC);
1549 
1550 	opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1551 
1552 	if (!opt) {
1553 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname);
1554 		RETURN_FALSE;
1555 	}
1556 
1557 	optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt TSRMLS_CC);
1558 	switch (optt) {
1559 		case TidyString:
1560 			RETURN_STRING((char *)optval, 0);
1561 			break;
1562 
1563 		case TidyInteger:
1564 			RETURN_LONG((long)optval);
1565 			break;
1566 
1567 		case TidyBoolean:
1568 			if (optval) {
1569 				RETURN_TRUE;
1570 			} else {
1571 				RETURN_FALSE;
1572 			}
1573 			break;
1574 
1575 		default:
1576 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to determine type of configuration option");
1577 			break;
1578 	}
1579 
1580 	RETURN_FALSE;
1581 }
1582 /* }}} */
1583 
TIDY_DOC_METHOD(__construct)1584 static TIDY_DOC_METHOD(__construct)
1585 {
1586 	char *inputfile = NULL, *enc = NULL;
1587 	int input_len = 0, enc_len = 0, contents_len = 0;
1588 	zend_bool use_include_path = 0;
1589 	char *contents;
1590 	zval **options = NULL;
1591 
1592 	PHPTidyObj *obj;
1593 	TIDY_SET_CONTEXT;
1594 
1595 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|pZsb", &inputfile, &input_len,
1596 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1597 		RETURN_FALSE;
1598 	}
1599 
1600 	obj = (PHPTidyObj *)zend_object_store_get_object(object TSRMLS_CC);
1601 
1602 	if (inputfile) {
1603 		if (!(contents = php_tidy_file_to_mem(inputfile, use_include_path, &contents_len TSRMLS_CC))) {
1604 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot Load '%s' into memory%s", inputfile, (use_include_path) ? " (Using include path)" : "");
1605 			return;
1606 		}
1607 
1608 		TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1609 
1610 		php_tidy_parse_string(obj, contents, contents_len, enc TSRMLS_CC);
1611 
1612 		efree(contents);
1613 	}
1614 }
1615 
TIDY_DOC_METHOD(parseFile)1616 static TIDY_DOC_METHOD(parseFile)
1617 {
1618 	char *inputfile, *enc = NULL;
1619 	int input_len, enc_len = 0, contents_len = 0;
1620 	zend_bool use_include_path = 0;
1621 	char *contents;
1622 	zval **options = NULL;
1623 	PHPTidyObj *obj;
1624 
1625 	TIDY_SET_CONTEXT;
1626 
1627 	obj = (PHPTidyObj *)zend_object_store_get_object(object TSRMLS_CC);
1628 
1629 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "p|Zsb", &inputfile, &input_len,
1630 							  &options, &enc, &enc_len, &use_include_path) == FAILURE) {
1631 		RETURN_FALSE;
1632 	}
1633 
1634 	if (!(contents = php_tidy_file_to_mem(inputfile, use_include_path, &contents_len TSRMLS_CC))) {
1635 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot Load '%s' into memory%s", inputfile, (use_include_path) ? " (Using include path)" : "");
1636 		RETURN_FALSE;
1637 	}
1638 
1639 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1640 
1641 	if(php_tidy_parse_string(obj, contents, contents_len, enc TSRMLS_CC) == FAILURE) {
1642 		RETVAL_FALSE;
1643 	} else {
1644 		RETVAL_TRUE;
1645 	}
1646 
1647 	efree(contents);
1648 }
1649 
TIDY_DOC_METHOD(parseString)1650 static TIDY_DOC_METHOD(parseString)
1651 {
1652 	char *input, *enc = NULL;
1653 	int input_len, enc_len = 0;
1654 	zval **options = NULL;
1655 	PHPTidyObj *obj;
1656 
1657 	TIDY_SET_CONTEXT;
1658 
1659 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|Zs", &input, &input_len, &options, &enc, &enc_len) == FAILURE) {
1660 		RETURN_FALSE;
1661 	}
1662 
1663 	obj = (PHPTidyObj *)zend_object_store_get_object(object TSRMLS_CC);
1664 
1665 	TIDY_APPLY_CONFIG_ZVAL(obj->ptdoc->doc, options);
1666 
1667 	if(php_tidy_parse_string(obj, input, input_len, enc TSRMLS_CC) == SUCCESS) {
1668 		RETURN_TRUE;
1669 	}
1670 
1671 	RETURN_FALSE;
1672 }
1673 
1674 
1675 /* {{{ proto TidyNode tidy_get_root()
1676    Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1677 static PHP_FUNCTION(tidy_get_root)
1678 {
1679 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1680 }
1681 /* }}} */
1682 
1683 /* {{{ proto TidyNode tidy_get_html()
1684    Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1685 static PHP_FUNCTION(tidy_get_html)
1686 {
1687 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1688 }
1689 /* }}} */
1690 
1691 /* {{{ proto TidyNode tidy_get_head()
1692    Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1693 static PHP_FUNCTION(tidy_get_head)
1694 {
1695 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1696 }
1697 /* }}} */
1698 
1699 /* {{{ proto TidyNode tidy_get_body(resource tidy)
1700    Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1701 static PHP_FUNCTION(tidy_get_body)
1702 {
1703 	php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1704 }
1705 /* }}} */
1706 
1707 /* {{{ proto boolean tidyNode::hasChildren()
1708    Returns true if this node has children */
TIDY_NODE_METHOD(hasChildren)1709 static TIDY_NODE_METHOD(hasChildren)
1710 {
1711 	TIDY_FETCH_ONLY_OBJECT;
1712 
1713 	if (tidyGetChild(obj->node)) {
1714 		RETURN_TRUE;
1715 	} else {
1716 		RETURN_FALSE;
1717 	}
1718 }
1719 /* }}} */
1720 
1721 /* {{{ proto boolean tidyNode::hasSiblings()
1722    Returns true if this node has siblings */
TIDY_NODE_METHOD(hasSiblings)1723 static TIDY_NODE_METHOD(hasSiblings)
1724 {
1725 	TIDY_FETCH_ONLY_OBJECT;
1726 
1727 	if (obj->node && tidyGetNext(obj->node)) {
1728 		RETURN_TRUE;
1729 	} else {
1730 		RETURN_FALSE;
1731 	}
1732 }
1733 /* }}} */
1734 
1735 /* {{{ proto boolean tidyNode::isComment()
1736    Returns true if this node represents a comment */
TIDY_NODE_METHOD(isComment)1737 static TIDY_NODE_METHOD(isComment)
1738 {
1739 	TIDY_FETCH_ONLY_OBJECT;
1740 
1741 	if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1742 		RETURN_TRUE;
1743 	} else {
1744 		RETURN_FALSE;
1745 	}
1746 }
1747 /* }}} */
1748 
1749 /* {{{ proto boolean tidyNode::isHtml()
1750    Returns true if this node is part of a HTML document */
TIDY_NODE_METHOD(isHtml)1751 static TIDY_NODE_METHOD(isHtml)
1752 {
1753 	TIDY_FETCH_ONLY_OBJECT;
1754 
1755 	if (tidyNodeGetType(obj->node) & (TidyNode_Start | TidyNode_End | TidyNode_StartEnd)) {
1756 		RETURN_TRUE;
1757 	}
1758 
1759 	RETURN_FALSE;
1760 }
1761 /* }}} */
1762 
1763 /* {{{ proto boolean tidyNode::isText()
1764    Returns true if this node represents text (no markup) */
TIDY_NODE_METHOD(isText)1765 static TIDY_NODE_METHOD(isText)
1766 {
1767 	TIDY_FETCH_ONLY_OBJECT;
1768 
1769 	if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1770 		RETURN_TRUE;
1771 	} else {
1772 		RETURN_FALSE;
1773 	}
1774 }
1775 /* }}} */
1776 
1777 /* {{{ proto boolean tidyNode::isJste()
1778    Returns true if this node is JSTE */
TIDY_NODE_METHOD(isJste)1779 static TIDY_NODE_METHOD(isJste)
1780 {
1781 	TIDY_FETCH_ONLY_OBJECT;
1782 
1783 	if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1784 		RETURN_TRUE;
1785 	} else {
1786 		RETURN_FALSE;
1787 	}
1788 }
1789 /* }}} */
1790 
1791 /* {{{ proto boolean tidyNode::isAsp()
1792    Returns true if this node is ASP */
TIDY_NODE_METHOD(isAsp)1793 static TIDY_NODE_METHOD(isAsp)
1794 {
1795 	TIDY_FETCH_ONLY_OBJECT;
1796 
1797 	if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1798 		RETURN_TRUE;
1799 	} else {
1800 		RETURN_FALSE;
1801 	}
1802 }
1803 /* }}} */
1804 
1805 /* {{{ proto boolean tidyNode::isPhp()
1806    Returns true if this node is PHP */
TIDY_NODE_METHOD(isPhp)1807 static TIDY_NODE_METHOD(isPhp)
1808 {
1809 	TIDY_FETCH_ONLY_OBJECT;
1810 
1811 	if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1812 		RETURN_TRUE;
1813 	} else {
1814 		RETURN_FALSE;
1815 	}
1816 }
1817 /* }}} */
1818 
1819 /* {{{ proto tidyNode tidyNode::getParent()
1820    Returns the parent node if available or NULL */
TIDY_NODE_METHOD(getParent)1821 static TIDY_NODE_METHOD(getParent)
1822 {
1823 	TidyNode	parent_node;
1824 	PHPTidyObj *newobj;
1825 	TIDY_FETCH_ONLY_OBJECT;
1826 
1827 	parent_node = tidyGetParent(obj->node);
1828 	if(parent_node) {
1829 		tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC);
1830 		newobj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC);
1831 		newobj->node = parent_node;
1832 		newobj->type = is_node;
1833 		newobj->ptdoc = obj->ptdoc;
1834 		newobj->ptdoc->ref_count++;
1835 		tidy_add_default_properties(newobj, is_node TSRMLS_CC);
1836 	} else {
1837 		ZVAL_NULL(return_value);
1838 	}
1839 }
1840 /* }}} */
1841 
1842 
1843 /* {{{ proto void tidyNode::__construct()
1844          __constructor for tidyNode. */
TIDY_NODE_METHOD(__construct)1845 static TIDY_NODE_METHOD(__construct)
1846 {
1847 	php_error_docref(NULL TSRMLS_CC, E_ERROR, "You should not create a tidyNode manually");
1848 }
1849 /* }}} */
1850 
_php_tidy_register_nodetypes(INIT_FUNC_ARGS)1851 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS)
1852 {
1853 	TIDY_NODE_CONST(ROOT, Root);
1854 	TIDY_NODE_CONST(DOCTYPE, DocType);
1855 	TIDY_NODE_CONST(COMMENT, Comment);
1856 	TIDY_NODE_CONST(PROCINS, ProcIns);
1857 	TIDY_NODE_CONST(TEXT, Text);
1858 	TIDY_NODE_CONST(START, Start);
1859 	TIDY_NODE_CONST(END, End);
1860 	TIDY_NODE_CONST(STARTEND, StartEnd);
1861 	TIDY_NODE_CONST(CDATA, CDATA);
1862 	TIDY_NODE_CONST(SECTION, Section);
1863 	TIDY_NODE_CONST(ASP, Asp);
1864 	TIDY_NODE_CONST(JSTE, Jste);
1865 	TIDY_NODE_CONST(PHP, Php);
1866 	TIDY_NODE_CONST(XMLDECL, XmlDecl);
1867 }
1868 
_php_tidy_register_tags(INIT_FUNC_ARGS)1869 static void _php_tidy_register_tags(INIT_FUNC_ARGS)
1870 {
1871 	TIDY_TAG_CONST(UNKNOWN);
1872 	TIDY_TAG_CONST(A);
1873 	TIDY_TAG_CONST(ABBR);
1874 	TIDY_TAG_CONST(ACRONYM);
1875 	TIDY_TAG_CONST(ADDRESS);
1876 	TIDY_TAG_CONST(ALIGN);
1877 	TIDY_TAG_CONST(APPLET);
1878 	TIDY_TAG_CONST(AREA);
1879 	TIDY_TAG_CONST(B);
1880 	TIDY_TAG_CONST(BASE);
1881 	TIDY_TAG_CONST(BASEFONT);
1882 	TIDY_TAG_CONST(BDO);
1883 	TIDY_TAG_CONST(BGSOUND);
1884 	TIDY_TAG_CONST(BIG);
1885 	TIDY_TAG_CONST(BLINK);
1886 	TIDY_TAG_CONST(BLOCKQUOTE);
1887 	TIDY_TAG_CONST(BODY);
1888 	TIDY_TAG_CONST(BR);
1889 	TIDY_TAG_CONST(BUTTON);
1890 	TIDY_TAG_CONST(CAPTION);
1891 	TIDY_TAG_CONST(CENTER);
1892 	TIDY_TAG_CONST(CITE);
1893 	TIDY_TAG_CONST(CODE);
1894 	TIDY_TAG_CONST(COL);
1895 	TIDY_TAG_CONST(COLGROUP);
1896 	TIDY_TAG_CONST(COMMENT);
1897 	TIDY_TAG_CONST(DD);
1898 	TIDY_TAG_CONST(DEL);
1899 	TIDY_TAG_CONST(DFN);
1900 	TIDY_TAG_CONST(DIR);
1901 	TIDY_TAG_CONST(DIV);
1902 	TIDY_TAG_CONST(DL);
1903 	TIDY_TAG_CONST(DT);
1904 	TIDY_TAG_CONST(EM);
1905 	TIDY_TAG_CONST(EMBED);
1906 	TIDY_TAG_CONST(FIELDSET);
1907 	TIDY_TAG_CONST(FONT);
1908 	TIDY_TAG_CONST(FORM);
1909 	TIDY_TAG_CONST(FRAME);
1910 	TIDY_TAG_CONST(FRAMESET);
1911 	TIDY_TAG_CONST(H1);
1912 	TIDY_TAG_CONST(H2);
1913 	TIDY_TAG_CONST(H3);
1914 	TIDY_TAG_CONST(H4);
1915 	TIDY_TAG_CONST(H5);
1916 	TIDY_TAG_CONST(H6);
1917 	TIDY_TAG_CONST(HEAD);
1918 	TIDY_TAG_CONST(HR);
1919 	TIDY_TAG_CONST(HTML);
1920 	TIDY_TAG_CONST(I);
1921 	TIDY_TAG_CONST(IFRAME);
1922 	TIDY_TAG_CONST(ILAYER);
1923 	TIDY_TAG_CONST(IMG);
1924 	TIDY_TAG_CONST(INPUT);
1925 	TIDY_TAG_CONST(INS);
1926 	TIDY_TAG_CONST(ISINDEX);
1927 	TIDY_TAG_CONST(KBD);
1928 	TIDY_TAG_CONST(KEYGEN);
1929 	TIDY_TAG_CONST(LABEL);
1930 	TIDY_TAG_CONST(LAYER);
1931 	TIDY_TAG_CONST(LEGEND);
1932 	TIDY_TAG_CONST(LI);
1933 	TIDY_TAG_CONST(LINK);
1934 	TIDY_TAG_CONST(LISTING);
1935 	TIDY_TAG_CONST(MAP);
1936 	TIDY_TAG_CONST(MARQUEE);
1937 	TIDY_TAG_CONST(MENU);
1938 	TIDY_TAG_CONST(META);
1939 	TIDY_TAG_CONST(MULTICOL);
1940 	TIDY_TAG_CONST(NOBR);
1941 	TIDY_TAG_CONST(NOEMBED);
1942 	TIDY_TAG_CONST(NOFRAMES);
1943 	TIDY_TAG_CONST(NOLAYER);
1944 	TIDY_TAG_CONST(NOSAVE);
1945 	TIDY_TAG_CONST(NOSCRIPT);
1946 	TIDY_TAG_CONST(OBJECT);
1947 	TIDY_TAG_CONST(OL);
1948 	TIDY_TAG_CONST(OPTGROUP);
1949 	TIDY_TAG_CONST(OPTION);
1950 	TIDY_TAG_CONST(P);
1951 	TIDY_TAG_CONST(PARAM);
1952 	TIDY_TAG_CONST(PLAINTEXT);
1953 	TIDY_TAG_CONST(PRE);
1954 	TIDY_TAG_CONST(Q);
1955 	TIDY_TAG_CONST(RB);
1956 	TIDY_TAG_CONST(RBC);
1957 	TIDY_TAG_CONST(RP);
1958 	TIDY_TAG_CONST(RT);
1959 	TIDY_TAG_CONST(RTC);
1960 	TIDY_TAG_CONST(RUBY);
1961 	TIDY_TAG_CONST(S);
1962 	TIDY_TAG_CONST(SAMP);
1963 	TIDY_TAG_CONST(SCRIPT);
1964 	TIDY_TAG_CONST(SELECT);
1965 	TIDY_TAG_CONST(SERVER);
1966 	TIDY_TAG_CONST(SERVLET);
1967 	TIDY_TAG_CONST(SMALL);
1968 	TIDY_TAG_CONST(SPACER);
1969 	TIDY_TAG_CONST(SPAN);
1970 	TIDY_TAG_CONST(STRIKE);
1971 	TIDY_TAG_CONST(STRONG);
1972 	TIDY_TAG_CONST(STYLE);
1973 	TIDY_TAG_CONST(SUB);
1974 	TIDY_TAG_CONST(SUP);
1975 	TIDY_TAG_CONST(TABLE);
1976 	TIDY_TAG_CONST(TBODY);
1977 	TIDY_TAG_CONST(TD);
1978 	TIDY_TAG_CONST(TEXTAREA);
1979 	TIDY_TAG_CONST(TFOOT);
1980 	TIDY_TAG_CONST(TH);
1981 	TIDY_TAG_CONST(THEAD);
1982 	TIDY_TAG_CONST(TITLE);
1983 	TIDY_TAG_CONST(TR);
1984 	TIDY_TAG_CONST(TT);
1985 	TIDY_TAG_CONST(U);
1986 	TIDY_TAG_CONST(UL);
1987 	TIDY_TAG_CONST(VAR);
1988 	TIDY_TAG_CONST(WBR);
1989 	TIDY_TAG_CONST(XMP);
1990 }
1991 
1992 #endif
1993 
1994 /*
1995  * Local variables:
1996  * tab-width: 4
1997  * c-basic-offset: 4
1998  * End:
1999  * vim600: noet sw=4 ts=4 fdm=marker
2000  * vim<600: noet sw=4 ts=4
2001  */
2002