xref: /PHP-7.4/ext/tidy/tidy.c (revision 221345a0)
12fb97cdfSJohn Coggeshall /*
22fb97cdfSJohn Coggeshall   +----------------------------------------------------------------------+
3d0cb7153SJohannes Schlüter   | PHP Version 7                                                        |
42fb97cdfSJohn Coggeshall   +----------------------------------------------------------------------+
50cf7de1cSZeev Suraski   | Copyright (c) The PHP Group                                          |
62fb97cdfSJohn Coggeshall   +----------------------------------------------------------------------+
75bd93221Sfoobar   | This source file is subject to version 3.01 of the PHP license,      |
82fb97cdfSJohn Coggeshall   | that is bundled with this package in the file LICENSE, and is        |
92fb97cdfSJohn Coggeshall   | available through the world-wide-web at the following url:           |
105bd93221Sfoobar   | http://www.php.net/license/3_01.txt                                  |
112fb97cdfSJohn Coggeshall   | If you did not receive a copy of the PHP license and are unable to   |
122fb97cdfSJohn Coggeshall   | obtain it through the world-wide-web, please send a note to          |
132fb97cdfSJohn Coggeshall   | license@php.net so we can mail you a copy immediately.               |
142fb97cdfSJohn Coggeshall   +----------------------------------------------------------------------+
152fb97cdfSJohn Coggeshall   | Author: John Coggeshall <john@php.net>                               |
162fb97cdfSJohn Coggeshall   +----------------------------------------------------------------------+
172fb97cdfSJohn Coggeshall */
182fb97cdfSJohn Coggeshall 
192fb97cdfSJohn Coggeshall #ifdef HAVE_CONFIG_H
202fb97cdfSJohn Coggeshall #include "config.h"
212fb97cdfSJohn Coggeshall #endif
222fb97cdfSJohn Coggeshall 
232fb97cdfSJohn Coggeshall #include "php.h"
24a7b75e20SJohn Coggeshall #include "php_tidy.h"
2548200cbeSfoobar 
2648200cbeSfoobar #if HAVE_TIDY
2748200cbeSfoobar 
282fb97cdfSJohn Coggeshall #include "php_ini.h"
292fb97cdfSJohn Coggeshall #include "ext/standard/info.h"
309b13b215Sfoobar 
31e7da9685SDavid Carlier #if HAVE_TIDY_H
32c9d3f325Sfoobar #include "tidy.h"
33e7da9685SDavid Carlier #elif HAVE_TIDYP_H
34e7da9685SDavid Carlier #include "tidyp.h"
35e7da9685SDavid Carlier #endif
36a552ac5bSMichael Orlitzky 
37a552ac5bSMichael Orlitzky #if HAVE_TIDYBUFFIO_H
38a552ac5bSMichael Orlitzky #include "tidybuffio.h"
39a552ac5bSMichael Orlitzky #else
40c9d3f325Sfoobar #include "buffio.h"
41a552ac5bSMichael Orlitzky #endif
429b13b215Sfoobar 
43227ca46fSNuno Lopes /* compatibility with older versions of libtidy */
44227ca46fSNuno Lopes #ifndef TIDY_CALL
45227ca46fSNuno Lopes #define TIDY_CALL
46227ca46fSNuno Lopes #endif
47227ca46fSNuno Lopes 
489b13b215Sfoobar /* {{{ ext/tidy macros
499b13b215Sfoobar */
5011d24c15SMichael Wallner #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
5111d24c15SMichael Wallner 
529b21e0d0SJohn Coggeshall #define TIDY_SET_CONTEXT \
53af49e58fSJani Taskinen     zval *object = getThis();
5496b9fb07SNuno Lopes 
559b13b215Sfoobar #define TIDY_FETCH_OBJECT	\
569b13b215Sfoobar 	PHPTidyObj *obj;	\
576ba2e4bcSFrank M. Kromann 	TIDY_SET_CONTEXT; \
589b13b215Sfoobar 	if (object) {	\
5984a8bb03SFelipe Pena 		if (zend_parse_parameters_none() == FAILURE) {	\
6084a8bb03SFelipe Pena 			return;	\
619b13b215Sfoobar 		}	\
629b13b215Sfoobar 	} else {	\
63bdeb220fSAnatol Belski 		if (zend_parse_method_parameters(ZEND_NUM_ARGS(), NULL, "O", &object, tidy_ce_doc) == FAILURE) {	\
649b13b215Sfoobar 			RETURN_FALSE;	\
659b13b215Sfoobar 		}	\
669b13b215Sfoobar 	}	\
670f6c93d6SXinchen Hui 	obj = Z_TIDY_P(object);	\
689b13b215Sfoobar 
69d4bf0799SNikita Popov #define TIDY_FETCH_INITIALIZED_OBJECT \
70d4bf0799SNikita Popov 	TIDY_FETCH_OBJECT; \
71d4bf0799SNikita Popov 	if (!obj->ptdoc->initialized) { \
72d4bf0799SNikita Popov 		zend_throw_error(NULL, "tidy object is not initialized"); \
73d4bf0799SNikita Popov 		return; \
74d4bf0799SNikita Popov 	}
75d4bf0799SNikita Popov 
769b13b215Sfoobar #define TIDY_FETCH_ONLY_OBJECT	\
779b13b215Sfoobar 	PHPTidyObj *obj;	\
786ba2e4bcSFrank M. Kromann 	TIDY_SET_CONTEXT; \
7984a8bb03SFelipe Pena 	if (zend_parse_parameters_none() == FAILURE) {	\
8084a8bb03SFelipe Pena 		return;	\
819b13b215Sfoobar 	}	\
820f6c93d6SXinchen Hui 	obj = Z_TIDY_P(object);	\
839b13b215Sfoobar 
849b13b215Sfoobar #define TIDY_APPLY_CONFIG_ZVAL(_doc, _val) \
85af49e58fSJani Taskinen     if(_val) { \
860f6c93d6SXinchen Hui         if(Z_TYPE_P(_val) == IS_ARRAY) { \
87ad4fa8f7SDmitry Stogov             _php_tidy_apply_config_array(_doc, Z_ARRVAL_P(_val)); \
88af49e58fSJani Taskinen         } else { \
89af49e58fSJani Taskinen             convert_to_string_ex(_val); \
900f6c93d6SXinchen Hui             TIDY_OPEN_BASE_DIR_CHECK(Z_STRVAL_P(_val)); \
910f6c93d6SXinchen Hui             switch (tidyLoadConfig(_doc, Z_STRVAL_P(_val))) { \
92af49e58fSJani Taskinen               case -1: \
93bdeb220fSAnatol Belski                 php_error_docref(NULL, E_WARNING, "Could not load configuration file '%s'", Z_STRVAL_P(_val)); \
94af49e58fSJani Taskinen                 break; \
95af49e58fSJani Taskinen               case 1: \
96bdeb220fSAnatol Belski                 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the configuration file '%s'", Z_STRVAL_P(_val)); \
97af49e58fSJani Taskinen                 break; \
98af49e58fSJani Taskinen             } \
99af49e58fSJani Taskinen         } \
100af49e58fSJani Taskinen     }
1019b13b215Sfoobar 
1026b02a01cSJohn Coggeshall #define REGISTER_TIDY_CLASS(classname, name, parent, __flags) \
1039b13b215Sfoobar 	{ \
1049b13b215Sfoobar 		zend_class_entry ce; \
1059b13b215Sfoobar 		INIT_CLASS_ENTRY(ce, # classname, tidy_funcs_ ## name); \
1069b13b215Sfoobar 		ce.create_object = tidy_object_new_ ## name; \
107bdeb220fSAnatol Belski 		tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent); \
1086b02a01cSJohn Coggeshall 		tidy_ce_ ## name->ce_flags |= __flags;  \
109f2b4ec4bSDmitry Stogov 		memcpy(&tidy_object_handlers_ ## name, &std_object_handlers, sizeof(zend_object_handlers)); \
1109b13b215Sfoobar 		tidy_object_handlers_ ## name.clone_obj = NULL; \
1119b13b215Sfoobar 	}
1129b13b215Sfoobar 
113c3e3c98eSAnatol Belski #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
114c3e3c98eSAnatol Belski #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
1159b13b215Sfoobar 
1169b13b215Sfoobar #ifndef TRUE
1179b13b215Sfoobar #define TRUE 1
1189b13b215Sfoobar #endif
1199b13b215Sfoobar 
1209b13b215Sfoobar #ifndef FALSE
1219b13b215Sfoobar #define FALSE 0
1229b13b215Sfoobar #endif
1239b13b215Sfoobar 
1249b13b215Sfoobar #define ADD_PROPERTY_STRING(_table, _key, _string) \
1259b13b215Sfoobar 	{ \
1260f6c93d6SXinchen Hui 		zval tmp; \
1279b13b215Sfoobar 		if (_string) { \
1280f6c93d6SXinchen Hui 			ZVAL_STRING(&tmp, (char *)_string); \
1299b13b215Sfoobar 		} else { \
1300f6c93d6SXinchen Hui 			ZVAL_EMPTY_STRING(&tmp); \
1319b13b215Sfoobar 		} \
1320f6c93d6SXinchen Hui 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
1339b13b215Sfoobar 	}
1349b13b215Sfoobar 
135a329ecd8SMichael Wallner #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
136af49e58fSJani Taskinen    { \
1370f6c93d6SXinchen Hui        zval tmp; \
138af49e58fSJani Taskinen        if (_string) { \
1390f6c93d6SXinchen Hui            ZVAL_STRINGL(&tmp, (char *)_string, _len); \
140af49e58fSJani Taskinen        } else { \
1410f6c93d6SXinchen Hui            ZVAL_EMPTY_STRING(&tmp); \
142af49e58fSJani Taskinen        } \
1430f6c93d6SXinchen Hui        zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
144af49e58fSJani Taskinen    }
145a329ecd8SMichael Wallner 
1469b13b215Sfoobar #define ADD_PROPERTY_LONG(_table, _key, _long) \
1479b13b215Sfoobar 	{ \
1480f6c93d6SXinchen Hui 		zval tmp; \
149c3e3c98eSAnatol Belski 		ZVAL_LONG(&tmp, _long); \
1500f6c93d6SXinchen Hui 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
1519b13b215Sfoobar 	}
1529b13b215Sfoobar 
1539b13b215Sfoobar #define ADD_PROPERTY_NULL(_table, _key) \
1549b13b215Sfoobar 	{ \
1550f6c93d6SXinchen Hui 		zval tmp; \
1560f6c93d6SXinchen Hui 		ZVAL_NULL(&tmp); \
1570f6c93d6SXinchen Hui 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
1589b13b215Sfoobar 	}
1599b13b215Sfoobar 
160c590531cSJohn Coggeshall #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
161af49e58fSJani Taskinen     { \
1620f6c93d6SXinchen Hui 		zval tmp; \
1630f6c93d6SXinchen Hui 		ZVAL_BOOL(&tmp, _bool); \
1640f6c93d6SXinchen Hui 		zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
1650f6c93d6SXinchen Hui 	}
166c590531cSJohn Coggeshall 
167dd8e59daSKalle Sommer Nielsen #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
168bdeb220fSAnatol Belski if (php_check_open_basedir(filename)) { \
169af49e58fSJani Taskinen 	RETURN_FALSE; \
170af49e58fSJani Taskinen } \
1719b13b215Sfoobar 
1729b13b215Sfoobar #define TIDY_SET_DEFAULT_CONFIG(_doc) \
1739b13b215Sfoobar 	if (TG(default_config) && TG(default_config)[0]) { \
1749b13b215Sfoobar 		if (tidyLoadConfig(_doc, TG(default_config)) < 0) { \
175bdeb220fSAnatol Belski 			php_error_docref(NULL, E_WARNING, "Unable to load Tidy configuration file at '%s'.", TG(default_config)); \
1769b13b215Sfoobar 		} \
1779b13b215Sfoobar 	}
1789b13b215Sfoobar /* }}} */
1799b13b215Sfoobar 
180b7a7b1a6SStanislav Malyshev /* {{{ ext/tidy structs
1819b13b215Sfoobar */
1829b13b215Sfoobar typedef struct _PHPTidyDoc PHPTidyDoc;
1839b13b215Sfoobar typedef struct _PHPTidyObj PHPTidyObj;
1849b13b215Sfoobar 
1859b13b215Sfoobar typedef enum {
1869b13b215Sfoobar 	is_node,
18796b9fb07SNuno Lopes 	is_doc
1889b13b215Sfoobar } tidy_obj_type;
1899b13b215Sfoobar 
1909b13b215Sfoobar typedef enum {
1919b13b215Sfoobar 	is_root_node,
1929b13b215Sfoobar 	is_html_node,
1939b13b215Sfoobar 	is_head_node,
1949b13b215Sfoobar 	is_body_node
1959b13b215Sfoobar } tidy_base_nodetypes;
1969b13b215Sfoobar 
1979b13b215Sfoobar struct _PHPTidyDoc {
19811d24c15SMichael Wallner 	TidyDoc			doc;
19911d24c15SMichael Wallner 	TidyBuffer		*errbuf;
20011d24c15SMichael Wallner 	unsigned int	ref_count;
201206f123bSAntony Dovgal 	unsigned int    initialized:1;
2029b13b215Sfoobar };
2039b13b215Sfoobar 
2049b13b215Sfoobar struct _PHPTidyObj {
20511d24c15SMichael Wallner 	TidyNode		node;
20611d24c15SMichael Wallner 	tidy_obj_type	type;
20711d24c15SMichael Wallner 	PHPTidyDoc		*ptdoc;
2080f6c93d6SXinchen Hui 	zend_object		std;
2099b13b215Sfoobar };
2100f6c93d6SXinchen Hui 
php_tidy_fetch_object(zend_object * obj)2110f6c93d6SXinchen Hui static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
2120f6c93d6SXinchen Hui 	return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
2130f6c93d6SXinchen Hui }
2140f6c93d6SXinchen Hui 
2150f6c93d6SXinchen Hui #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
2169b13b215Sfoobar /* }}} */
2179b13b215Sfoobar 
2189b13b215Sfoobar /* {{{ ext/tidy prototypes
2199b13b215Sfoobar */
220bdeb220fSAnatol Belski static zend_string *php_tidy_file_to_mem(char *, zend_bool);
221bdeb220fSAnatol Belski static void tidy_object_free_storage(zend_object *);
222bdeb220fSAnatol Belski static zend_object *tidy_object_new_node(zend_class_entry *);
223bdeb220fSAnatol Belski static zend_object *tidy_object_new_doc(zend_class_entry *);
224bdeb220fSAnatol Belski static zval * tidy_instanciate(zend_class_entry *, zval *);
225bdeb220fSAnatol Belski static int tidy_doc_cast_handler(zval *, zval *, int);
226bdeb220fSAnatol Belski static int tidy_node_cast_handler(zval *, zval *, int);
227bdeb220fSAnatol Belski static void tidy_doc_update_properties(PHPTidyObj *);
228bdeb220fSAnatol Belski static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type);
229bdeb220fSAnatol Belski static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
2309b13b215Sfoobar static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
231bdeb220fSAnatol Belski static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
232bdeb220fSAnatol Belski static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
2339b13b215Sfoobar static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
2349b13b215Sfoobar static void _php_tidy_register_tags(INIT_FUNC_ARGS);
23511d24c15SMichael Wallner static PHP_INI_MH(php_tidy_set_clean_output);
236bdeb220fSAnatol Belski static void php_tidy_clean_output_start(const char *name, size_t name_len);
237bdeb220fSAnatol Belski static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
23811d24c15SMichael Wallner static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
239ddb4d1fdSNuno Lopes 
240ddb4d1fdSNuno Lopes static PHP_MINIT_FUNCTION(tidy);
241ddb4d1fdSNuno Lopes static PHP_MSHUTDOWN_FUNCTION(tidy);
242ddb4d1fdSNuno Lopes static PHP_RINIT_FUNCTION(tidy);
243*221345a0SChristoph M. Becker static PHP_RSHUTDOWN_FUNCTION(tidy);
244ddb4d1fdSNuno Lopes static PHP_MINFO_FUNCTION(tidy);
245ddb4d1fdSNuno Lopes 
246ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_getopt);
247ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_parse_string);
248ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_parse_file);
249ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_clean_repair);
250ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_repair_string);
251ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_repair_file);
252ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_diagnose);
253ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_output);
254ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_error_buffer);
255ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_release);
256ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_config);
257ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_status);
258ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_html_ver);
259ddb4d1fdSNuno Lopes #if HAVE_TIDYOPTGETDOC
260ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_opt_doc);
261ddb4d1fdSNuno Lopes #endif
262ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_is_xhtml);
263ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_is_xml);
264ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_error_count);
265ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_warning_count);
266ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_access_count);
267ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_config_count);
268ddb4d1fdSNuno Lopes 
269ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_root);
270ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_html);
271ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_head);
272ddb4d1fdSNuno Lopes static PHP_FUNCTION(tidy_get_body);
273ddb4d1fdSNuno Lopes 
274ddb4d1fdSNuno Lopes static TIDY_DOC_METHOD(__construct);
275ddb4d1fdSNuno Lopes static TIDY_DOC_METHOD(parseFile);
276ddb4d1fdSNuno Lopes static TIDY_DOC_METHOD(parseString);
277ddb4d1fdSNuno Lopes 
278ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(hasChildren);
279ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(hasSiblings);
280ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(isComment);
281ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(isHtml);
282ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(isText);
283ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(isJste);
284ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(isAsp);
285ddb4d1fdSNuno Lopes static TIDY_NODE_METHOD(isPhp);
2868b9f475aSNuno Lopes static TIDY_NODE_METHOD(getParent);
287832eb472SPierrick Charron static TIDY_NODE_METHOD(__construct);
2889b13b215Sfoobar /* }}} */
2899b13b215Sfoobar 
2903ab29352SJohn Coggeshall ZEND_DECLARE_MODULE_GLOBALS(tidy)
2914cd101abSIlia Alshanetsky 
2923ab29352SJohn Coggeshall PHP_INI_BEGIN()
29311d24c15SMichael Wallner STD_PHP_INI_ENTRY("tidy.default_config",	"",		PHP_INI_SYSTEM,		OnUpdateString,				default_config,		zend_tidy_globals,	tidy_globals)
29411d24c15SMichael Wallner STD_PHP_INI_ENTRY("tidy.clean_output",		"0",	PHP_INI_USER,		php_tidy_set_clean_output,	clean_output,		zend_tidy_globals,	tidy_globals)
2953ab29352SJohn Coggeshall PHP_INI_END()
2962fb97cdfSJohn Coggeshall 
297bedd3238SFelipe Pena /* {{{ arginfo */
298bedd3238SFelipe Pena ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_string, 0, 0, 1)
299bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, input)
300bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, config_options)
301bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, encoding)
302bedd3238SFelipe Pena ZEND_END_ARG_INFO()
303bedd3238SFelipe Pena 
30478ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_error_buffer, 0, 0, 1)
30578ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
306bedd3238SFelipe Pena ZEND_END_ARG_INFO()
307bedd3238SFelipe Pena 
30878ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_output, 0, 0, 1)
30978ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
310bedd3238SFelipe Pena ZEND_END_ARG_INFO()
311bedd3238SFelipe Pena 
312bedd3238SFelipe Pena ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_parse_file, 0, 0, 1)
313bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, file)
314bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, config_options)
315bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, encoding)
316bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, use_include_path)
317bedd3238SFelipe Pena ZEND_END_ARG_INFO()
318bedd3238SFelipe Pena 
31978ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_clean_repair, 0, 0, 1)
32078ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
321bedd3238SFelipe Pena ZEND_END_ARG_INFO()
322bedd3238SFelipe Pena 
323bedd3238SFelipe Pena ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_string, 0, 0, 1)
324bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, data)
325bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, config_file)
326bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, encoding)
327bedd3238SFelipe Pena ZEND_END_ARG_INFO()
328bedd3238SFelipe Pena 
329bedd3238SFelipe Pena ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_repair_file, 0, 0, 1)
330bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, filename)
331bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, config_file)
332bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, encoding)
333bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, use_include_path)
334bedd3238SFelipe Pena ZEND_END_ARG_INFO()
335bedd3238SFelipe Pena 
33678ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_diagnose, 0, 0, 1)
33778ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
338bedd3238SFelipe Pena ZEND_END_ARG_INFO()
339bedd3238SFelipe Pena 
340bedd3238SFelipe Pena ZEND_BEGIN_ARG_INFO(arginfo_tidy_get_release, 0)
341bedd3238SFelipe Pena ZEND_END_ARG_INFO()
342bedd3238SFelipe Pena 
343bedd3238SFelipe Pena #if HAVE_TIDYOPTGETDOC
344bedd3238SFelipe Pena ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc, 0, 0, 2)
345bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, resource)
346bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, optname)
347bedd3238SFelipe Pena ZEND_END_ARG_INFO()
34897353cdaSGabriel Caruso 
34997353cdaSGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_opt_doc_method, 0, 0, 1)
35097353cdaSGabriel Caruso 	ZEND_ARG_INFO(0, optname)
35197353cdaSGabriel Caruso ZEND_END_ARG_INFO()
352bedd3238SFelipe Pena #endif
353bedd3238SFelipe Pena 
35478ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_config, 0, 0, 1)
35578ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
356bedd3238SFelipe Pena ZEND_END_ARG_INFO()
357bedd3238SFelipe Pena 
35878ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_status, 0, 0, 1)
35978ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
360bedd3238SFelipe Pena ZEND_END_ARG_INFO()
361bedd3238SFelipe Pena 
36278ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_html_ver, 0, 0, 1)
36378ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
364bedd3238SFelipe Pena ZEND_END_ARG_INFO()
365bedd3238SFelipe Pena 
36678ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xhtml, 0, 0, 1)
36778ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
368bedd3238SFelipe Pena ZEND_END_ARG_INFO()
369bedd3238SFelipe Pena 
37078ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_is_xml, 0, 0, 1)
37178ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
372bedd3238SFelipe Pena ZEND_END_ARG_INFO()
373bedd3238SFelipe Pena 
37478ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_error_count, 0, 0, 1)
37578ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
376bedd3238SFelipe Pena ZEND_END_ARG_INFO()
377bedd3238SFelipe Pena 
37878ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_warning_count, 0, 0, 1)
37978ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
380bedd3238SFelipe Pena ZEND_END_ARG_INFO()
381bedd3238SFelipe Pena 
38278ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_access_count, 0, 0, 1)
38378ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
384bedd3238SFelipe Pena ZEND_END_ARG_INFO()
385bedd3238SFelipe Pena 
38678ed55d7SGabriel Caruso ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_config_count, 0, 0, 1)
38778ed55d7SGabriel Caruso     ZEND_ARG_INFO(0, object)
388bedd3238SFelipe Pena ZEND_END_ARG_INFO()
389bedd3238SFelipe Pena 
3908588a458STyson Andre ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt_method, 0, 0, 1)
391bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, option)
392bedd3238SFelipe Pena ZEND_END_ARG_INFO()
393bedd3238SFelipe Pena 
3948588a458STyson Andre ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_getopt, 0, 0, 2)
3958588a458STyson Andre 	ZEND_ARG_INFO(0, object)
3968588a458STyson Andre 	ZEND_ARG_INFO(0, option)
3978588a458STyson Andre ZEND_END_ARG_INFO()
3988588a458STyson Andre 
3998588a458STyson Andre ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_root, 0, 0, 1)
4008588a458STyson Andre 	ZEND_ARG_INFO(0, object)
401bedd3238SFelipe Pena ZEND_END_ARG_INFO()
402bedd3238SFelipe Pena 
4038588a458STyson Andre ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_html, 0, 0, 1)
4048588a458STyson Andre 	ZEND_ARG_INFO(0, object)
405bedd3238SFelipe Pena ZEND_END_ARG_INFO()
406bedd3238SFelipe Pena 
4078588a458STyson Andre ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_head, 0, 0, 1)
4088588a458STyson Andre 	ZEND_ARG_INFO(0, object)
409bedd3238SFelipe Pena ZEND_END_ARG_INFO()
410bedd3238SFelipe Pena 
411bedd3238SFelipe Pena ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_get_body, 0, 0, 1)
412bedd3238SFelipe Pena 	ZEND_ARG_INFO(0, tidy)
413bedd3238SFelipe Pena ZEND_END_ARG_INFO()
41497353cdaSGabriel Caruso 
415dcd4b321STyson Andre ZEND_BEGIN_ARG_INFO_EX(arginfo_tidy_construct, 0, 0, 0)
41697353cdaSGabriel Caruso     ZEND_ARG_INFO(0, filename)
41797353cdaSGabriel Caruso     ZEND_ARG_INFO(0, config_file)
41897353cdaSGabriel Caruso     ZEND_ARG_INFO(0, encoding)
41997353cdaSGabriel Caruso     ZEND_ARG_INFO(0, use_include_path)
42097353cdaSGabriel Caruso ZEND_END_ARG_INFO()
421bedd3238SFelipe Pena /* }}} */
422bedd3238SFelipe Pena 
4236c810b0dSDmitry Stogov static const zend_function_entry tidy_functions[] = {
424bedd3238SFelipe Pena 	PHP_FE(tidy_getopt,             arginfo_tidy_getopt)
425bedd3238SFelipe Pena 	PHP_FE(tidy_parse_string,       arginfo_tidy_parse_string)
426bedd3238SFelipe Pena 	PHP_FE(tidy_parse_file,         arginfo_tidy_parse_file)
427bedd3238SFelipe Pena 	PHP_FE(tidy_get_output,         arginfo_tidy_get_output)
428b7a7b1a6SStanislav Malyshev 	PHP_FE(tidy_get_error_buffer,   arginfo_tidy_get_error_buffer)
429bedd3238SFelipe Pena 	PHP_FE(tidy_clean_repair,       arginfo_tidy_clean_repair)
430bedd3238SFelipe Pena 	PHP_FE(tidy_repair_string,	arginfo_tidy_repair_string)
431bedd3238SFelipe Pena 	PHP_FE(tidy_repair_file,	arginfo_tidy_repair_file)
432af49e58fSJani Taskinen 	PHP_FE(tidy_diagnose,           arginfo_tidy_diagnose)
433bedd3238SFelipe Pena 	PHP_FE(tidy_get_release,	arginfo_tidy_get_release)
434bedd3238SFelipe Pena 	PHP_FE(tidy_get_config,		arginfo_tidy_get_config)
435bedd3238SFelipe Pena 	PHP_FE(tidy_get_status,		arginfo_tidy_get_status)
436bedd3238SFelipe Pena 	PHP_FE(tidy_get_html_ver,	arginfo_tidy_get_html_ver)
437bedd3238SFelipe Pena 	PHP_FE(tidy_is_xhtml,		arginfo_tidy_is_xhtml)
438af49e58fSJani Taskinen 	PHP_FE(tidy_is_xml,		arginfo_tidy_is_xml)
439bedd3238SFelipe Pena 	PHP_FE(tidy_error_count,	arginfo_tidy_error_count)
440bedd3238SFelipe Pena 	PHP_FE(tidy_warning_count,	arginfo_tidy_warning_count)
441bedd3238SFelipe Pena 	PHP_FE(tidy_access_count,	arginfo_tidy_access_count)
442b7a7b1a6SStanislav Malyshev 	PHP_FE(tidy_config_count,	arginfo_tidy_config_count)
443335742c2SJohn Coggeshall #if HAVE_TIDYOPTGETDOC
444bedd3238SFelipe Pena 	PHP_FE(tidy_get_opt_doc,	arginfo_tidy_get_opt_doc)
445335742c2SJohn Coggeshall #endif
446bedd3238SFelipe Pena 	PHP_FE(tidy_get_root,		arginfo_tidy_get_root)
447bedd3238SFelipe Pena 	PHP_FE(tidy_get_head,		arginfo_tidy_get_head)
448bedd3238SFelipe Pena 	PHP_FE(tidy_get_html,		arginfo_tidy_get_html)
449bedd3238SFelipe Pena 	PHP_FE(tidy_get_body,		arginfo_tidy_get_body)
4504b30846bSFelipe Pena 	PHP_FE_END
4512fb97cdfSJohn Coggeshall };
4522fb97cdfSJohn Coggeshall 
4536c810b0dSDmitry Stogov static const zend_function_entry tidy_funcs_doc[] = {
4548588a458STyson Andre 	TIDY_METHOD_MAP(getOpt, tidy_getopt, arginfo_tidy_getopt_method)
4551bad08a8SJohn Coggeshall 	TIDY_METHOD_MAP(cleanRepair, tidy_clean_repair, NULL)
45697353cdaSGabriel Caruso 	TIDY_DOC_ME(parseFile, arginfo_tidy_parse_file)
45797353cdaSGabriel Caruso 	TIDY_DOC_ME(parseString, arginfo_tidy_parse_string)
45897353cdaSGabriel Caruso 	TIDY_METHOD_MAP(repairString, tidy_repair_string, arginfo_tidy_repair_string)
45997353cdaSGabriel Caruso 	TIDY_METHOD_MAP(repairFile, tidy_repair_file, arginfo_tidy_repair_file)
4603ab29352SJohn Coggeshall 	TIDY_METHOD_MAP(diagnose, tidy_diagnose, NULL)
4611bad08a8SJohn Coggeshall 	TIDY_METHOD_MAP(getRelease, tidy_get_release, NULL)
4621bad08a8SJohn Coggeshall 	TIDY_METHOD_MAP(getConfig, tidy_get_config, NULL)
4631bad08a8SJohn Coggeshall 	TIDY_METHOD_MAP(getStatus, tidy_get_status, NULL)
4641bad08a8SJohn Coggeshall 	TIDY_METHOD_MAP(getHtmlVer, tidy_get_html_ver, NULL)
465335742c2SJohn Coggeshall #if HAVE_TIDYOPTGETDOC
46697353cdaSGabriel Caruso 	TIDY_METHOD_MAP(getOptDoc, tidy_get_opt_doc, arginfo_tidy_get_opt_doc_method)
467335742c2SJohn Coggeshall #endif
4681bad08a8SJohn Coggeshall 	TIDY_METHOD_MAP(isXhtml, tidy_is_xhtml, NULL)
4691bad08a8SJohn Coggeshall 	TIDY_METHOD_MAP(isXml, tidy_is_xml, NULL)
4703ab29352SJohn Coggeshall 	TIDY_METHOD_MAP(root, tidy_get_root, NULL)
4713ab29352SJohn Coggeshall 	TIDY_METHOD_MAP(head, tidy_get_head, NULL)
4723ab29352SJohn Coggeshall 	TIDY_METHOD_MAP(html, tidy_get_html, NULL)
4733ab29352SJohn Coggeshall 	TIDY_METHOD_MAP(body, tidy_get_body, NULL)
47497353cdaSGabriel Caruso 	TIDY_DOC_ME(__construct, arginfo_tidy_construct)
4754b30846bSFelipe Pena 	PHP_FE_END
4763ab29352SJohn Coggeshall };
477a7b75e20SJohn Coggeshall 
4786c810b0dSDmitry Stogov static const zend_function_entry tidy_funcs_node[] = {
4791bad08a8SJohn Coggeshall 	TIDY_NODE_ME(hasChildren, NULL)
4801bad08a8SJohn Coggeshall 	TIDY_NODE_ME(hasSiblings, NULL)
4811bad08a8SJohn Coggeshall 	TIDY_NODE_ME(isComment, NULL)
4821bad08a8SJohn Coggeshall 	TIDY_NODE_ME(isHtml, NULL)
4831bad08a8SJohn Coggeshall 	TIDY_NODE_ME(isText, NULL)
4841bad08a8SJohn Coggeshall 	TIDY_NODE_ME(isJste, NULL)
4851bad08a8SJohn Coggeshall 	TIDY_NODE_ME(isAsp, NULL)
4861bad08a8SJohn Coggeshall 	TIDY_NODE_ME(isPhp, NULL)
4878b9f475aSNuno Lopes 	TIDY_NODE_ME(getParent, NULL)
4883554a274SPierrick Charron 	TIDY_NODE_PRIVATE_ME(__construct, NULL)
4894b30846bSFelipe Pena 	PHP_FE_END
490a7b75e20SJohn Coggeshall };
491a7b75e20SJohn Coggeshall 
492aa6593f9SNuno Lopes static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
4933ab29352SJohn Coggeshall 
4943ab29352SJohn Coggeshall static zend_object_handlers tidy_object_handlers_doc;
4953ab29352SJohn Coggeshall static zend_object_handlers tidy_object_handlers_node;
4962fb97cdfSJohn Coggeshall 
4976b778b0dSMarcus Boerger zend_module_entry tidy_module_entry = {
4982fb97cdfSJohn Coggeshall 	STANDARD_MODULE_HEADER,
499ea72aabfSJohn Coggeshall 	"tidy",
5002fb97cdfSJohn Coggeshall 	tidy_functions,
5012fb97cdfSJohn Coggeshall 	PHP_MINIT(tidy),
5024cb3d182SAntony Dovgal 	PHP_MSHUTDOWN(tidy),
50339e56b5fSJohn Coggeshall 	PHP_RINIT(tidy),
504*221345a0SChristoph M. Becker 	PHP_RSHUTDOWN(tidy),
5052fb97cdfSJohn Coggeshall 	PHP_MINFO(tidy),
506663074b6SAnatol Belski 	PHP_TIDY_VERSION,
5071dbaae27SDmitry Stogov 	PHP_MODULE_GLOBALS(tidy),
5081dbaae27SDmitry Stogov 	NULL,
5091dbaae27SDmitry Stogov 	NULL,
5101dbaae27SDmitry Stogov 	NULL,
5111dbaae27SDmitry Stogov 	STANDARD_MODULE_PROPERTIES_EX
5122fb97cdfSJohn Coggeshall };
5132fb97cdfSJohn Coggeshall 
5142fb97cdfSJohn Coggeshall #ifdef COMPILE_DL_TIDY
515073c79b8SAnatol Belski #ifdef ZTS
5161ac15293SNikita Popov ZEND_TSRMLS_CACHE_DEFINE()
517073c79b8SAnatol Belski #endif
ZEND_GET_MODULE(tidy)5182fb97cdfSJohn Coggeshall ZEND_GET_MODULE(tidy)
5192fb97cdfSJohn Coggeshall #endif
5202fb97cdfSJohn Coggeshall 
521c26991a9SNuno Lopes static void* TIDY_CALL php_tidy_malloc(size_t len)
5224cd101abSIlia Alshanetsky {
5233ab29352SJohn Coggeshall 	return emalloc(len);
5244cd101abSIlia Alshanetsky }
5254cd101abSIlia Alshanetsky 
php_tidy_realloc(void * buf,size_t len)526c26991a9SNuno Lopes static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
5274cd101abSIlia Alshanetsky {
5283ab29352SJohn Coggeshall 	return erealloc(buf, len);
5294cd101abSIlia Alshanetsky }
5304cd101abSIlia Alshanetsky 
php_tidy_free(void * buf)531c26991a9SNuno Lopes static void TIDY_CALL php_tidy_free(void *buf)
5324cd101abSIlia Alshanetsky {
5333ab29352SJohn Coggeshall 	efree(buf);
5344cd101abSIlia Alshanetsky }
535664268b0SIlia Alshanetsky 
php_tidy_panic(ctmbstr msg)536c26991a9SNuno Lopes static void TIDY_CALL php_tidy_panic(ctmbstr msg)
5374f6874b3SIlia Alshanetsky {
538bdeb220fSAnatol Belski 	php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
5394f6874b3SIlia Alshanetsky }
5404f6874b3SIlia Alshanetsky 
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)541bdeb220fSAnatol Belski static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
542d900d737SJohn Coggeshall {
54349fb6260SIlia Alshanetsky 	TidyOption opt = tidyGetOptionByName(doc, optname);
5446a9d2b21SDmitry Stogov 	zend_string *str, *tmp_str;
5456a9d2b21SDmitry Stogov 	zend_long lval;
546d900d737SJohn Coggeshall 
547d900d737SJohn Coggeshall 	if (!opt) {
548bdeb220fSAnatol Belski 		php_error_docref(NULL, E_NOTICE, "Unknown Tidy Configuration Option '%s'", optname);
549d900d737SJohn Coggeshall 		return FAILURE;
550d900d737SJohn Coggeshall 	}
551b7a7b1a6SStanislav Malyshev 
552d900d737SJohn Coggeshall 	if (tidyOptIsReadOnly(opt)) {
553bdeb220fSAnatol Belski 		php_error_docref(NULL, E_NOTICE, "Attempting to set read-only option '%s'", optname);
554d900d737SJohn Coggeshall 		return FAILURE;
555d900d737SJohn Coggeshall 	}
556d900d737SJohn Coggeshall 
557d900d737SJohn Coggeshall 	switch(tidyOptGetType(opt)) {
558d900d737SJohn Coggeshall 		case TidyString:
5596a9d2b21SDmitry Stogov 			str = zval_get_tmp_string(value, &tmp_str);
5606a9d2b21SDmitry Stogov 			if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
5616a9d2b21SDmitry Stogov 				zend_tmp_string_release(tmp_str);
562d900d737SJohn Coggeshall 				return SUCCESS;
563d900d737SJohn Coggeshall 			}
5646a9d2b21SDmitry Stogov 			zend_tmp_string_release(tmp_str);
565d900d737SJohn Coggeshall 			break;
566d900d737SJohn Coggeshall 
567d900d737SJohn Coggeshall 		case TidyInteger:
5686a9d2b21SDmitry Stogov 			lval = zval_get_long(value);
5696a9d2b21SDmitry Stogov 			if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
570d900d737SJohn Coggeshall 				return SUCCESS;
571d900d737SJohn Coggeshall 			}
572d900d737SJohn Coggeshall 			break;
573d900d737SJohn Coggeshall 
574d900d737SJohn Coggeshall 		case TidyBoolean:
5756a9d2b21SDmitry Stogov 			lval = zval_get_long(value);
5766a9d2b21SDmitry Stogov 			if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
577d900d737SJohn Coggeshall 				return SUCCESS;
578d900d737SJohn Coggeshall 			}
579d900d737SJohn Coggeshall 			break;
580d900d737SJohn Coggeshall 
581d900d737SJohn Coggeshall 		default:
582bdeb220fSAnatol Belski 			php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
583d900d737SJohn Coggeshall 			break;
584b7a7b1a6SStanislav Malyshev 	}
58549fb6260SIlia Alshanetsky 
586d900d737SJohn Coggeshall 	return FAILURE;
587d900d737SJohn Coggeshall }
588d900d737SJohn Coggeshall 
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,zend_bool is_file)589bca08dabSJohn Coggeshall static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
5904f6874b3SIlia Alshanetsky {
5910f6c93d6SXinchen Hui 	char *enc = NULL;
59232344808SAnatol Belski 	size_t enc_len = 0;
5934f6874b3SIlia Alshanetsky 	zend_bool use_include_path = 0;
594962edd2dSJohn Coggeshall 	TidyDoc doc;
595962edd2dSJohn Coggeshall 	TidyBuffer *errbuf;
5960f6c93d6SXinchen Hui 	zend_string *data, *arg1;
5970f6c93d6SXinchen Hui 	zval *config = NULL;
5985796326dSIlia Alshanetsky 
5994f6874b3SIlia Alshanetsky 	if (is_file) {
600bdeb220fSAnatol Belski 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "P|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
601fb2ce0bbSRasmus Lerdorf 			RETURN_FALSE;
602fb2ce0bbSRasmus Lerdorf 		}
6034a2e40bbSDmitry Stogov 		if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
6044f6874b3SIlia Alshanetsky 			RETURN_FALSE;
6054f6874b3SIlia Alshanetsky 		}
6064f6874b3SIlia Alshanetsky 	} else {
607bdeb220fSAnatol Belski 		if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|zsb", &arg1, &config, &enc, &enc_len, &use_include_path) == FAILURE) {
608fb2ce0bbSRasmus Lerdorf 			RETURN_FALSE;
609fb2ce0bbSRasmus Lerdorf 		}
6104f6874b3SIlia Alshanetsky 		data = arg1;
6114f6874b3SIlia Alshanetsky 	}
6124f6874b3SIlia Alshanetsky 
613ce3e3f78SAnatol Belski 	if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
614ce3e3f78SAnatol Belski 		php_error_docref(NULL, E_WARNING, "Input string is too long");
615ce3e3f78SAnatol Belski 		RETURN_FALSE;
616ce3e3f78SAnatol Belski 	}
617ce3e3f78SAnatol Belski 
618962edd2dSJohn Coggeshall 	doc = tidyCreate();
619962edd2dSJohn Coggeshall 	errbuf = emalloc(sizeof(TidyBuffer));
620962edd2dSJohn Coggeshall 	tidyBufInit(errbuf);
621b7a7b1a6SStanislav Malyshev 
622962edd2dSJohn Coggeshall 	if (tidySetErrorBuffer(doc, errbuf) != 0) {
623e4c5e856SIlia Alshanetsky 		tidyBufFree(errbuf);
624e4c5e856SIlia Alshanetsky 		efree(errbuf);
625e4c5e856SIlia Alshanetsky 		tidyRelease(doc);
626bdeb220fSAnatol Belski 		php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
627962edd2dSJohn Coggeshall 	}
628b7a7b1a6SStanislav Malyshev 
629962edd2dSJohn Coggeshall 	tidyOptSetBool(doc, TidyForceOutput, yes);
630962edd2dSJohn Coggeshall 	tidyOptSetBool(doc, TidyMark, no);
631b7a7b1a6SStanislav Malyshev 
632962edd2dSJohn Coggeshall 	TIDY_SET_DEFAULT_CONFIG(doc);
633b7a7b1a6SStanislav Malyshev 
634194fff35SIlia Alshanetsky 	if (config) {
635b57b1ff4SXinchen Hui 		TIDY_APPLY_CONFIG_ZVAL(doc, config);
6364f6874b3SIlia Alshanetsky 	}
6374f6874b3SIlia Alshanetsky 
638af49e58fSJani Taskinen 	if(enc_len) {
63960489070SJohn Coggeshall 		if (tidySetCharEncoding(doc, enc) < 0) {
640bdeb220fSAnatol Belski 			php_error_docref(NULL, E_WARNING, "Could not set encoding '%s'", enc);
64160489070SJohn Coggeshall 			RETVAL_FALSE;
64260489070SJohn Coggeshall 		}
64360489070SJohn Coggeshall 	}
644b7a7b1a6SStanislav Malyshev 
6454f6874b3SIlia Alshanetsky 	if (data) {
646d27d7619SNuno Lopes 		TidyBuffer buf;
647d27d7619SNuno Lopes 
648a329ecd8SMichael Wallner 		tidyBufInit(&buf);
649b204b3abSAnatol Belski 		tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
650d27d7619SNuno Lopes 
651a329ecd8SMichael Wallner 		if (tidyParseBuffer(doc, &buf) < 0) {
652bdeb220fSAnatol Belski 			php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
6534f6874b3SIlia Alshanetsky 			RETVAL_FALSE;
6544f6874b3SIlia Alshanetsky 		} else {
655962edd2dSJohn Coggeshall 			if (tidyCleanAndRepair(doc) >= 0) {
656d27d7619SNuno Lopes 				TidyBuffer output;
657d27d7619SNuno Lopes 				tidyBufInit(&output);
6584f6874b3SIlia Alshanetsky 
659962edd2dSJohn Coggeshall 				tidySaveBuffer (doc, &output);
66011d24c15SMichael Wallner 				FIX_BUFFER(&output);
6610f6c93d6SXinchen Hui 				RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
6624f6874b3SIlia Alshanetsky 				tidyBufFree(&output);
6634f6874b3SIlia Alshanetsky 			} else {
6644f6874b3SIlia Alshanetsky 				RETVAL_FALSE;
6654f6874b3SIlia Alshanetsky 			}
6664f6874b3SIlia Alshanetsky 		}
6674f6874b3SIlia Alshanetsky 	}
6684f6874b3SIlia Alshanetsky 
6694f6874b3SIlia Alshanetsky 	if (is_file) {
6705eb1f92fSDmitry Stogov 		zend_string_release_ex(data, 0);
6714f6874b3SIlia Alshanetsky 	}
67211d24c15SMichael Wallner 
673962edd2dSJohn Coggeshall 	tidyBufFree(errbuf);
674962edd2dSJohn Coggeshall 	efree(errbuf);
675962edd2dSJohn Coggeshall 	tidyRelease(doc);
6764f6874b3SIlia Alshanetsky }
6774f6874b3SIlia Alshanetsky 
php_tidy_file_to_mem(char * filename,zend_bool use_include_path)678bdeb220fSAnatol Belski static zend_string *php_tidy_file_to_mem(char *filename, zend_bool use_include_path)
679a123efb6SJohn Coggeshall {
6803ab29352SJohn Coggeshall 	php_stream *stream;
6810f6c93d6SXinchen Hui 	zend_string *data = NULL;
6822fb97cdfSJohn Coggeshall 
683dd8e59daSKalle Sommer Nielsen 	if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
6843ab29352SJohn Coggeshall 		return NULL;
6853ab29352SJohn Coggeshall 	}
6860f6c93d6SXinchen Hui 	if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
6874bd22cf1SDmitry Stogov 		data = ZSTR_EMPTY_ALLOC();
6883ab29352SJohn Coggeshall 	}
6893ab29352SJohn Coggeshall 	php_stream_close(stream);
6902fb97cdfSJohn Coggeshall 
6913ab29352SJohn Coggeshall 	return data;
6924cd101abSIlia Alshanetsky }
6932fb97cdfSJohn Coggeshall 
tidy_object_free_storage(zend_object * object)694bdeb220fSAnatol Belski static void tidy_object_free_storage(zend_object *object)
695a123efb6SJohn Coggeshall {
6960f6c93d6SXinchen Hui 	PHPTidyObj *intern = php_tidy_fetch_object(object);
6972fb97cdfSJohn Coggeshall 
698bdeb220fSAnatol Belski 	zend_object_std_dtor(&intern->std);
69939e56b5fSJohn Coggeshall 
700f85a11f2SAntony Dovgal 	if (intern->ptdoc) {
701f85a11f2SAntony Dovgal 		intern->ptdoc->ref_count--;
702f85a11f2SAntony Dovgal 
703f85a11f2SAntony Dovgal 		if (intern->ptdoc->ref_count <= 0) {
704f85a11f2SAntony Dovgal 			tidyBufFree(intern->ptdoc->errbuf);
705f85a11f2SAntony Dovgal 			efree(intern->ptdoc->errbuf);
706f85a11f2SAntony Dovgal 			tidyRelease(intern->ptdoc->doc);
707f85a11f2SAntony Dovgal 			efree(intern->ptdoc);
708f85a11f2SAntony Dovgal 		}
7094cd101abSIlia Alshanetsky 	}
7104cd101abSIlia Alshanetsky }
7112fb97cdfSJohn Coggeshall 
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)712bdeb220fSAnatol Belski static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
713a123efb6SJohn Coggeshall {
7143ab29352SJohn Coggeshall 	PHPTidyObj *intern;
7152fb97cdfSJohn Coggeshall 
716b72b1a4eSNikita Popov 	intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
717bdeb220fSAnatol Belski 	zend_object_std_init(&intern->std, class_type);
718c5237d82SDmitry Stogov 	object_properties_init(&intern->std, class_type);
7194cd101abSIlia Alshanetsky 
7203ab29352SJohn Coggeshall 	switch(objtype) {
7213ab29352SJohn Coggeshall 		case is_node:
7223ab29352SJohn Coggeshall 			break;
72339e56b5fSJohn Coggeshall 
72439e56b5fSJohn Coggeshall 		case is_doc:
7253ab29352SJohn Coggeshall 			intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
7263ab29352SJohn Coggeshall 			intern->ptdoc->doc = tidyCreate();
7273ab29352SJohn Coggeshall 			intern->ptdoc->ref_count = 1;
728206f123bSAntony Dovgal 			intern->ptdoc->initialized = 0;
7293ab29352SJohn Coggeshall 			intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
7303ab29352SJohn Coggeshall 			tidyBufInit(intern->ptdoc->errbuf);
73139e56b5fSJohn Coggeshall 
73239e56b5fSJohn Coggeshall 			if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
733e4c5e856SIlia Alshanetsky 				tidyBufFree(intern->ptdoc->errbuf);
734e4c5e856SIlia Alshanetsky 				efree(intern->ptdoc->errbuf);
735e4c5e856SIlia Alshanetsky 				tidyRelease(intern->ptdoc->doc);
736e4c5e856SIlia Alshanetsky 				efree(intern->ptdoc);
737e4c5e856SIlia Alshanetsky 				efree(intern);
738bdeb220fSAnatol Belski 				php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
7393ab29352SJohn Coggeshall 			}
74039e56b5fSJohn Coggeshall 
7413ab29352SJohn Coggeshall 			tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
7423ab29352SJohn Coggeshall 			tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
74339e56b5fSJohn Coggeshall 
7443ab29352SJohn Coggeshall 			TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
74539e56b5fSJohn Coggeshall 
746bdeb220fSAnatol Belski 			tidy_add_default_properties(intern, is_doc);
7473ab29352SJohn Coggeshall 			break;
7484cd101abSIlia Alshanetsky 	}
74939e56b5fSJohn Coggeshall 
7500f6c93d6SXinchen Hui 	intern->std.handlers = handlers;
7510f6c93d6SXinchen Hui 
7520f6c93d6SXinchen Hui 	return &intern->std;
7532fb97cdfSJohn Coggeshall }
7542fb97cdfSJohn Coggeshall 
tidy_object_new_node(zend_class_entry * class_type)755bdeb220fSAnatol Belski static zend_object *tidy_object_new_node(zend_class_entry *class_type)
756a123efb6SJohn Coggeshall {
757bdeb220fSAnatol Belski 	return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
7583ab29352SJohn Coggeshall }
7594cd101abSIlia Alshanetsky 
tidy_object_new_doc(zend_class_entry * class_type)760bdeb220fSAnatol Belski static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
7613ab29352SJohn Coggeshall {
762bdeb220fSAnatol Belski 	return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
7633ab29352SJohn Coggeshall }
7644cd101abSIlia Alshanetsky 
tidy_instanciate(zend_class_entry * pce,zval * object)765bdeb220fSAnatol Belski static zval * tidy_instanciate(zend_class_entry *pce, zval *object)
7663ab29352SJohn Coggeshall {
7673ab29352SJohn Coggeshall 	object_init_ex(object, pce);
7683ab29352SJohn Coggeshall 	return object;
7692fb97cdfSJohn Coggeshall }
7702fb97cdfSJohn Coggeshall 
tidy_doc_cast_handler(zval * in,zval * out,int type)771bdeb220fSAnatol Belski static int tidy_doc_cast_handler(zval *in, zval *out, int type)
772a123efb6SJohn Coggeshall {
773d27d7619SNuno Lopes 	TidyBuffer output;
7743ab29352SJohn Coggeshall 	PHPTidyObj *obj;
7754cd101abSIlia Alshanetsky 
7760f6c93d6SXinchen Hui 	switch (type) {
777c3e3c98eSAnatol Belski 		case IS_LONG:
778b2b2b437SNikita Popov 		case _IS_NUMBER:
779c3e3c98eSAnatol Belski 			ZVAL_LONG(out, 0);
7803ab29352SJohn Coggeshall 			break;
78139e56b5fSJohn Coggeshall 
7823ab29352SJohn Coggeshall 		case IS_DOUBLE:
7833ab29352SJohn Coggeshall 			ZVAL_DOUBLE(out, 0);
7843ab29352SJohn Coggeshall 			break;
78539e56b5fSJohn Coggeshall 
7860f6c93d6SXinchen Hui 		case _IS_BOOL:
787b5b61767SXinchen Hui 			ZVAL_TRUE(out);
7883ab29352SJohn Coggeshall 			break;
78939e56b5fSJohn Coggeshall 
7903ab29352SJohn Coggeshall 		case IS_STRING:
7910f6c93d6SXinchen Hui 			obj = Z_TIDY_P(in);
792d27d7619SNuno Lopes 			tidyBufInit(&output);
7933ab29352SJohn Coggeshall 			tidySaveBuffer (obj->ptdoc->doc, &output);
794608097a9SNikita Popov 			if (output.size) {
795608097a9SNikita Popov 				ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
796608097a9SNikita Popov 			} else {
797608097a9SNikita Popov 				ZVAL_EMPTY_STRING(out);
798608097a9SNikita Popov 			}
7993ab29352SJohn Coggeshall 			tidyBufFree(&output);
8003ab29352SJohn Coggeshall 			break;
80139e56b5fSJohn Coggeshall 
8023ab29352SJohn Coggeshall 		default:
8033ab29352SJohn Coggeshall 			return FAILURE;
804ea72aabfSJohn Coggeshall 	}
80539e56b5fSJohn Coggeshall 
8063ab29352SJohn Coggeshall 	return SUCCESS;
8072fb97cdfSJohn Coggeshall }
8082fb97cdfSJohn Coggeshall 
tidy_node_cast_handler(zval * in,zval * out,int type)809bdeb220fSAnatol Belski static int tidy_node_cast_handler(zval *in, zval *out, int type)
810a123efb6SJohn Coggeshall {
811d27d7619SNuno Lopes 	TidyBuffer buf;
8123ab29352SJohn Coggeshall 	PHPTidyObj *obj;
8134cd101abSIlia Alshanetsky 
8143ab29352SJohn Coggeshall 	switch(type) {
815c3e3c98eSAnatol Belski 		case IS_LONG:
816b2b2b437SNikita Popov 		case _IS_NUMBER:
817c3e3c98eSAnatol Belski 			ZVAL_LONG(out, 0);
8183ab29352SJohn Coggeshall 			break;
81939e56b5fSJohn Coggeshall 
8203ab29352SJohn Coggeshall 		case IS_DOUBLE:
8213ab29352SJohn Coggeshall 			ZVAL_DOUBLE(out, 0);
8223ab29352SJohn Coggeshall 			break;
82339e56b5fSJohn Coggeshall 
8240f6c93d6SXinchen Hui 		case _IS_BOOL:
825b5b61767SXinchen Hui 			ZVAL_TRUE(out);
8263ab29352SJohn Coggeshall 			break;
82739e56b5fSJohn Coggeshall 
8283ab29352SJohn Coggeshall 		case IS_STRING:
8290f6c93d6SXinchen Hui 			obj = Z_TIDY_P(in);
830d27d7619SNuno Lopes 			tidyBufInit(&buf);
8314b8911d0SNuno Lopes 			if (obj->ptdoc) {
8324b8911d0SNuno Lopes 				tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
8330f6c93d6SXinchen Hui 				ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
83411d24c15SMichael Wallner 			} else {
83511d24c15SMichael Wallner 				ZVAL_EMPTY_STRING(out);
8364b8911d0SNuno Lopes 			}
8373ab29352SJohn Coggeshall 			tidyBufFree(&buf);
8383ab29352SJohn Coggeshall 			break;
83939e56b5fSJohn Coggeshall 
8403ab29352SJohn Coggeshall 		default:
8413ab29352SJohn Coggeshall 			return FAILURE;
8423ab29352SJohn Coggeshall 	}
84339e56b5fSJohn Coggeshall 
8443ab29352SJohn Coggeshall 	return SUCCESS;
8453ab29352SJohn Coggeshall }
8463ab29352SJohn Coggeshall 
tidy_doc_update_properties(PHPTidyObj * obj)847bdeb220fSAnatol Belski static void tidy_doc_update_properties(PHPTidyObj *obj)
8483ab29352SJohn Coggeshall {
849af49e58fSJani Taskinen 
850d27d7619SNuno Lopes 	TidyBuffer output;
8510f6c93d6SXinchen Hui 	zval temp;
85239e56b5fSJohn Coggeshall 
853d27d7619SNuno Lopes 	tidyBufInit(&output);
85460489070SJohn Coggeshall 	tidySaveBuffer (obj->ptdoc->doc, &output);
855b7a7b1a6SStanislav Malyshev 
85660489070SJohn Coggeshall 	if (output.size) {
85725893b64SDmitry Stogov 		if (!obj->std.properties) {
85825893b64SDmitry Stogov 			rebuild_object_properties(&obj->std);
85925893b64SDmitry Stogov 		}
8600f6c93d6SXinchen Hui 		ZVAL_STRINGL(&temp, (char*)output.bp, output.size-1);
8610f6c93d6SXinchen Hui 		zend_hash_str_update(obj->std.properties, "value", sizeof("value") - 1, &temp);
8623ab29352SJohn Coggeshall 	}
863b7a7b1a6SStanislav Malyshev 
86460489070SJohn Coggeshall 	tidyBufFree(&output);
8653ab29352SJohn Coggeshall 
86660489070SJohn Coggeshall 	if (obj->ptdoc->errbuf->size) {
86725893b64SDmitry Stogov 		if (!obj->std.properties) {
86825893b64SDmitry Stogov 			rebuild_object_properties(&obj->std);
86925893b64SDmitry Stogov 		}
8700f6c93d6SXinchen Hui 		ZVAL_STRINGL(&temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
8710f6c93d6SXinchen Hui 		zend_hash_str_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer") - 1, &temp);
87260489070SJohn Coggeshall 	}
87364e40422SFrank M. Kromann }
87464e40422SFrank M. Kromann 
tidy_add_default_properties(PHPTidyObj * obj,tidy_obj_type type)875bdeb220fSAnatol Belski static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
8763ab29352SJohn Coggeshall {
87739e56b5fSJohn Coggeshall 
8783ab29352SJohn Coggeshall 	TidyBuffer buf;
8793ab29352SJohn Coggeshall 	TidyAttr	tempattr;
8803ab29352SJohn Coggeshall 	TidyNode	tempnode;
8810f6c93d6SXinchen Hui 	zval attribute, children, temp;
8823ab29352SJohn Coggeshall 	PHPTidyObj *newobj;
88339e56b5fSJohn Coggeshall 
8843ab29352SJohn Coggeshall 	switch(type) {
88539e56b5fSJohn Coggeshall 
8863ab29352SJohn Coggeshall 		case is_node:
88725893b64SDmitry Stogov 			if (!obj->std.properties) {
88825893b64SDmitry Stogov 				rebuild_object_properties(&obj->std);
88925893b64SDmitry Stogov 			}
890d27d7619SNuno Lopes 			tidyBufInit(&buf);
8913ab29352SJohn Coggeshall 			tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
89211d24c15SMichael Wallner 			ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
8933ab29352SJohn Coggeshall 			tidyBufFree(&buf);
8943ab29352SJohn Coggeshall 
89548a2b2a3SWez Furlong 			ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
89648a2b2a3SWez Furlong 			ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
897c590531cSJohn Coggeshall 			ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
89896b9fb07SNuno Lopes 			ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
89996b9fb07SNuno Lopes 			ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
90039e56b5fSJohn Coggeshall 
9013ab29352SJohn Coggeshall 			switch(tidyNodeGetType(obj->node)) {
9023ab29352SJohn Coggeshall 				case TidyNode_Root:
9033ab29352SJohn Coggeshall 				case TidyNode_DocType:
9043ab29352SJohn Coggeshall 				case TidyNode_Text:
9053ab29352SJohn Coggeshall 				case TidyNode_Comment:
9063ab29352SJohn Coggeshall 					break;
907b7a7b1a6SStanislav Malyshev 
9083ab29352SJohn Coggeshall 				default:
90948a2b2a3SWez Furlong 					ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
9103ab29352SJohn Coggeshall 			}
91139e56b5fSJohn Coggeshall 
9123ab29352SJohn Coggeshall 			tempattr = tidyAttrFirst(obj->node);
91339e56b5fSJohn Coggeshall 
91439e56b5fSJohn Coggeshall 			if (tempattr) {
9153ab29352SJohn Coggeshall 				char *name, *val;
9160f6c93d6SXinchen Hui 				array_init(&attribute);
91739e56b5fSJohn Coggeshall 
9183ab29352SJohn Coggeshall 				do {
9193ab29352SJohn Coggeshall 					name = (char *)tidyAttrName(tempattr);
9203ab29352SJohn Coggeshall 					val = (char *)tidyAttrValue(tempattr);
92139e56b5fSJohn Coggeshall 					if (name && val) {
9220f6c93d6SXinchen Hui 						add_assoc_string(&attribute, name, val);
9233ab29352SJohn Coggeshall 					}
9243ab29352SJohn Coggeshall 				} while((tempattr = tidyAttrNext(tempattr)));
9253ab29352SJohn Coggeshall 			} else {
9260f6c93d6SXinchen Hui 				ZVAL_NULL(&attribute);
9273ab29352SJohn Coggeshall 			}
9280f6c93d6SXinchen Hui 			zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);
92939e56b5f