1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: John Coggeshall <john@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include "php.h"
22 #include "php_tidy.h"
23
24 #ifdef HAVE_TIDY
25
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40
41 #include "tidy_arginfo.h"
42
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47
48 /* {{{ ext/tidy macros */
49 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
50
51 #define TIDY_SET_CONTEXT \
52 zval *object = getThis();
53
54 #define TIDY_FETCH_OBJECT \
55 PHPTidyObj *obj; \
56 zval *object; \
57 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) == FAILURE) { \
58 RETURN_THROWS(); \
59 } \
60 obj = Z_TIDY_P(object); \
61
62 #define TIDY_FETCH_INITIALIZED_OBJECT \
63 TIDY_FETCH_OBJECT; \
64 if (!obj->ptdoc->initialized) { \
65 zend_throw_error(NULL, "tidy object is not initialized"); \
66 return; \
67 }
68
69 #define TIDY_FETCH_ONLY_OBJECT \
70 PHPTidyObj *obj; \
71 TIDY_SET_CONTEXT; \
72 if (zend_parse_parameters_none() == FAILURE) { \
73 RETURN_THROWS(); \
74 } \
75 obj = Z_TIDY_P(object); \
76
77 #define TIDY_APPLY_CONFIG(_doc, _val_str, _val_ht) \
78 if (_val_ht) { \
79 _php_tidy_apply_config_array(_doc, _val_ht); \
80 } else if (_val_str) { \
81 TIDY_OPEN_BASE_DIR_CHECK(ZSTR_VAL(_val_str)); \
82 switch (tidyLoadConfig(_doc, ZSTR_VAL(_val_str))) { \
83 case -1: \
84 php_error_docref(NULL, E_WARNING, "Could not load configuration file \"%s\"", ZSTR_VAL(_val_str)); \
85 break; \
86 case 1: \
87 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the configuration file \"%s\"", ZSTR_VAL(_val_str)); \
88 break; \
89 } \
90 }
91
92 #define REGISTER_TIDY_CLASS(classname, name, parent, __flags) \
93 { \
94 zend_class_entry ce; \
95 INIT_CLASS_ENTRY(ce, # classname, class_ ## classname ## _methods); \
96 ce.create_object = tidy_object_new_ ## name; \
97 tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent); \
98 tidy_ce_ ## name->ce_flags |= __flags; \
99 memcpy(&tidy_object_handlers_ ## name, &std_object_handlers, sizeof(zend_object_handlers)); \
100 tidy_object_handlers_ ## name.clone_obj = NULL; \
101 }
102
103 #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
104 #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
105
106 #ifndef TRUE
107 #define TRUE 1
108 #endif
109
110 #ifndef FALSE
111 #define FALSE 0
112 #endif
113
114 #define ADD_PROPERTY_STRING(_table, _key, _string) \
115 { \
116 zval tmp; \
117 if (_string) { \
118 ZVAL_STRING(&tmp, (char *)_string); \
119 } else { \
120 ZVAL_EMPTY_STRING(&tmp); \
121 } \
122 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
123 }
124
125 #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
126 { \
127 zval tmp; \
128 if (_string) { \
129 ZVAL_STRINGL(&tmp, (char *)_string, _len); \
130 } else { \
131 ZVAL_EMPTY_STRING(&tmp); \
132 } \
133 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
134 }
135
136 #define ADD_PROPERTY_LONG(_table, _key, _long) \
137 { \
138 zval tmp; \
139 ZVAL_LONG(&tmp, _long); \
140 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
141 }
142
143 #define ADD_PROPERTY_NULL(_table, _key) \
144 { \
145 zval tmp; \
146 ZVAL_NULL(&tmp); \
147 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
148 }
149
150 #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
151 { \
152 zval tmp; \
153 ZVAL_BOOL(&tmp, _bool); \
154 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
155 }
156
157 #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
158 if (php_check_open_basedir(filename)) { \
159 RETURN_FALSE; \
160 } \
161
162 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
163 if (TG(default_config) && TG(default_config)[0]) { \
164 if (tidyLoadConfig(_doc, TG(default_config)) < 0) { \
165 php_error_docref(NULL, E_WARNING, "Unable to load Tidy configuration file at \"%s\"", TG(default_config)); \
166 } \
167 }
168 /* }}} */
169
170 /* {{{ ext/tidy structs */
171 typedef struct _PHPTidyDoc PHPTidyDoc;
172 typedef struct _PHPTidyObj PHPTidyObj;
173
174 typedef enum {
175 is_node,
176 is_doc
177 } tidy_obj_type;
178
179 typedef enum {
180 is_root_node,
181 is_html_node,
182 is_head_node,
183 is_body_node
184 } tidy_base_nodetypes;
185
186 struct _PHPTidyDoc {
187 TidyDoc doc;
188 TidyBuffer *errbuf;
189 unsigned int ref_count;
190 unsigned int initialized:1;
191 };
192
193 struct _PHPTidyObj {
194 TidyNode node;
195 tidy_obj_type type;
196 PHPTidyDoc *ptdoc;
197 zend_object std;
198 };
199
php_tidy_fetch_object(zend_object * obj)200 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
201 return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
202 }
203
204 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
205 /* }}} */
206
207 /* {{{ ext/tidy prototypes */
208 static zend_string *php_tidy_file_to_mem(char *, zend_bool);
209 static void tidy_object_free_storage(zend_object *);
210 static zend_object *tidy_object_new_node(zend_class_entry *);
211 static zend_object *tidy_object_new_doc(zend_class_entry *);
212 static zval * tidy_instanciate(zend_class_entry *, zval *);
213 static int tidy_doc_cast_handler(zend_object *, zval *, int);
214 static int tidy_node_cast_handler(zend_object *, zval *, int);
215 static void tidy_doc_update_properties(PHPTidyObj *);
216 static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type);
217 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
218 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
219 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
220 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
221 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
222 static void _php_tidy_register_tags(INIT_FUNC_ARGS);
223 static PHP_INI_MH(php_tidy_set_clean_output);
224 static void php_tidy_clean_output_start(const char *name, size_t name_len);
225 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
226 static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
227
228 static PHP_MINIT_FUNCTION(tidy);
229 static PHP_MSHUTDOWN_FUNCTION(tidy);
230 static PHP_RINIT_FUNCTION(tidy);
231 static PHP_RSHUTDOWN_FUNCTION(tidy);
232 static PHP_MINFO_FUNCTION(tidy);
233
234 ZEND_DECLARE_MODULE_GLOBALS(tidy)
235
236 PHP_INI_BEGIN()
237 STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals)
238 STD_PHP_INI_ENTRY("tidy.clean_output", "0", PHP_INI_USER, php_tidy_set_clean_output, clean_output, zend_tidy_globals, tidy_globals)
239 PHP_INI_END()
240
241 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
242
243 static zend_object_handlers tidy_object_handlers_doc;
244 static zend_object_handlers tidy_object_handlers_node;
245
246 zend_module_entry tidy_module_entry = {
247 STANDARD_MODULE_HEADER,
248 "tidy",
249 ext_functions,
250 PHP_MINIT(tidy),
251 PHP_MSHUTDOWN(tidy),
252 PHP_RINIT(tidy),
253 PHP_RSHUTDOWN(tidy),
254 PHP_MINFO(tidy),
255 PHP_TIDY_VERSION,
256 PHP_MODULE_GLOBALS(tidy),
257 NULL,
258 NULL,
259 NULL,
260 STANDARD_MODULE_PROPERTIES_EX
261 };
262
263 #ifdef COMPILE_DL_TIDY
264 #ifdef ZTS
265 ZEND_TSRMLS_CACHE_DEFINE()
266 #endif
ZEND_GET_MODULE(tidy)267 ZEND_GET_MODULE(tidy)
268 #endif
269
270 static void* TIDY_CALL php_tidy_malloc(size_t len)
271 {
272 return emalloc(len);
273 }
274
php_tidy_realloc(void * buf,size_t len)275 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
276 {
277 return erealloc(buf, len);
278 }
279
php_tidy_free(void * buf)280 static void TIDY_CALL php_tidy_free(void *buf)
281 {
282 efree(buf);
283 }
284
php_tidy_panic(ctmbstr msg)285 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
286 {
287 php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
288 }
289
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)290 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
291 {
292 TidyOption opt = tidyGetOptionByName(doc, optname);
293 zend_string *str, *tmp_str;
294 zend_long lval;
295
296 if (!opt) {
297 php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
298 return FAILURE;
299 }
300
301 if (tidyOptIsReadOnly(opt)) {
302 php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
303 return FAILURE;
304 }
305
306 switch(tidyOptGetType(opt)) {
307 case TidyString:
308 str = zval_get_tmp_string(value, &tmp_str);
309 if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
310 zend_tmp_string_release(tmp_str);
311 return SUCCESS;
312 }
313 zend_tmp_string_release(tmp_str);
314 break;
315
316 case TidyInteger:
317 lval = zval_get_long(value);
318 if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
319 return SUCCESS;
320 }
321 break;
322
323 case TidyBoolean:
324 lval = zval_get_long(value);
325 if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
326 return SUCCESS;
327 }
328 break;
329
330 default:
331 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
332 break;
333 }
334
335 return FAILURE;
336 }
337
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,zend_bool is_file)338 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file)
339 {
340 char *enc = NULL;
341 size_t enc_len = 0;
342 TidyDoc doc;
343 TidyBuffer *errbuf;
344 zend_string *data, *arg1, *config_str = NULL;
345 HashTable *config_ht = NULL;
346
347 if (is_file) {
348 zend_bool use_include_path = 0;
349
350 ZEND_PARSE_PARAMETERS_START(1, 4)
351 Z_PARAM_PATH_STR(arg1)
352 Z_PARAM_OPTIONAL
353 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
354 Z_PARAM_STRING(enc, enc_len)
355 Z_PARAM_BOOL(use_include_path)
356 ZEND_PARSE_PARAMETERS_END();
357
358 if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
359 RETURN_FALSE;
360 }
361 } else {
362 ZEND_PARSE_PARAMETERS_START(1, 3)
363 Z_PARAM_STR(arg1)
364 Z_PARAM_OPTIONAL
365 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
366 Z_PARAM_STRING(enc, enc_len)
367 ZEND_PARSE_PARAMETERS_END();
368
369 data = arg1;
370 }
371
372 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
373 zend_argument_value_error(1, "is too long");
374 RETURN_THROWS();
375 }
376
377 doc = tidyCreate();
378 errbuf = emalloc(sizeof(TidyBuffer));
379 tidyBufInit(errbuf);
380
381 if (tidySetErrorBuffer(doc, errbuf) != 0) {
382 tidyBufFree(errbuf);
383 efree(errbuf);
384 tidyRelease(doc);
385 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
386 }
387
388 tidyOptSetBool(doc, TidyForceOutput, yes);
389 tidyOptSetBool(doc, TidyMark, no);
390
391 TIDY_SET_DEFAULT_CONFIG(doc);
392
393 TIDY_APPLY_CONFIG(doc, config_str, config_ht);
394
395 if(enc_len) {
396 if (tidySetCharEncoding(doc, enc) < 0) {
397 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
398 RETVAL_FALSE;
399 }
400 }
401
402 if (data) {
403 TidyBuffer buf;
404
405 tidyBufInit(&buf);
406 tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
407
408 if (tidyParseBuffer(doc, &buf) < 0) {
409 php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
410 RETVAL_FALSE;
411 } else {
412 if (tidyCleanAndRepair(doc) >= 0) {
413 TidyBuffer output;
414 tidyBufInit(&output);
415
416 tidySaveBuffer (doc, &output);
417 FIX_BUFFER(&output);
418 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
419 tidyBufFree(&output);
420 } else {
421 RETVAL_FALSE;
422 }
423 }
424 }
425
426 if (is_file) {
427 zend_string_release_ex(data, 0);
428 }
429
430 tidyBufFree(errbuf);
431 efree(errbuf);
432 tidyRelease(doc);
433 }
434
php_tidy_file_to_mem(char * filename,zend_bool use_include_path)435 static zend_string *php_tidy_file_to_mem(char *filename, zend_bool use_include_path)
436 {
437 php_stream *stream;
438 zend_string *data = NULL;
439
440 if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
441 return NULL;
442 }
443 if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
444 data = ZSTR_EMPTY_ALLOC();
445 }
446 php_stream_close(stream);
447
448 return data;
449 }
450
tidy_object_free_storage(zend_object * object)451 static void tidy_object_free_storage(zend_object *object)
452 {
453 PHPTidyObj *intern = php_tidy_fetch_object(object);
454
455 zend_object_std_dtor(&intern->std);
456
457 if (intern->ptdoc) {
458 intern->ptdoc->ref_count--;
459
460 if (intern->ptdoc->ref_count <= 0) {
461 tidyBufFree(intern->ptdoc->errbuf);
462 efree(intern->ptdoc->errbuf);
463 tidyRelease(intern->ptdoc->doc);
464 efree(intern->ptdoc);
465 }
466 }
467 }
468
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)469 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
470 {
471 PHPTidyObj *intern;
472
473 intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
474 zend_object_std_init(&intern->std, class_type);
475 object_properties_init(&intern->std, class_type);
476
477 switch(objtype) {
478 case is_node:
479 break;
480
481 case is_doc:
482 intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
483 intern->ptdoc->doc = tidyCreate();
484 intern->ptdoc->ref_count = 1;
485 intern->ptdoc->initialized = 0;
486 intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
487 tidyBufInit(intern->ptdoc->errbuf);
488
489 if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
490 tidyBufFree(intern->ptdoc->errbuf);
491 efree(intern->ptdoc->errbuf);
492 tidyRelease(intern->ptdoc->doc);
493 efree(intern->ptdoc);
494 efree(intern);
495 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
496 }
497
498 tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
499 tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
500
501 TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
502
503 tidy_add_default_properties(intern, is_doc);
504 break;
505 }
506
507 intern->std.handlers = handlers;
508
509 return &intern->std;
510 }
511
tidy_object_new_node(zend_class_entry * class_type)512 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
513 {
514 return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
515 }
516
tidy_object_new_doc(zend_class_entry * class_type)517 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
518 {
519 return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
520 }
521
tidy_instanciate(zend_class_entry * pce,zval * object)522 static zval * tidy_instanciate(zend_class_entry *pce, zval *object)
523 {
524 object_init_ex(object, pce);
525 return object;
526 }
527
tidy_doc_cast_handler(zend_object * in,zval * out,int type)528 static int tidy_doc_cast_handler(zend_object *in, zval *out, int type)
529 {
530 TidyBuffer output;
531 PHPTidyObj *obj;
532
533 switch (type) {
534 case IS_LONG:
535 case _IS_NUMBER:
536 ZVAL_LONG(out, 0);
537 break;
538
539 case IS_DOUBLE:
540 ZVAL_DOUBLE(out, 0);
541 break;
542
543 case _IS_BOOL:
544 ZVAL_TRUE(out);
545 break;
546
547 case IS_STRING:
548 obj = php_tidy_fetch_object(in);
549 tidyBufInit(&output);
550 tidySaveBuffer (obj->ptdoc->doc, &output);
551 if (output.size) {
552 ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
553 } else {
554 ZVAL_EMPTY_STRING(out);
555 }
556 tidyBufFree(&output);
557 break;
558
559 default:
560 return FAILURE;
561 }
562
563 return SUCCESS;
564 }
565
tidy_node_cast_handler(zend_object * in,zval * out,int type)566 static int tidy_node_cast_handler(zend_object *in, zval *out, int type)
567 {
568 TidyBuffer buf;
569 PHPTidyObj *obj;
570
571 switch(type) {
572 case IS_LONG:
573 case _IS_NUMBER:
574 ZVAL_LONG(out, 0);
575 break;
576
577 case IS_DOUBLE:
578 ZVAL_DOUBLE(out, 0);
579 break;
580
581 case _IS_BOOL:
582 ZVAL_TRUE(out);
583 break;
584
585 case IS_STRING:
586 obj = php_tidy_fetch_object(in);
587 tidyBufInit(&buf);
588 if (obj->ptdoc) {
589 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
590 ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
591 } else {
592 ZVAL_EMPTY_STRING(out);
593 }
594 tidyBufFree(&buf);
595 break;
596
597 default:
598 return FAILURE;
599 }
600
601 return SUCCESS;
602 }
603
tidy_doc_update_properties(PHPTidyObj * obj)604 static void tidy_doc_update_properties(PHPTidyObj *obj)
605 {
606
607 TidyBuffer output;
608 zval temp;
609
610 tidyBufInit(&output);
611 tidySaveBuffer (obj->ptdoc->doc, &output);
612
613 if (output.size) {
614 if (!obj->std.properties) {
615 rebuild_object_properties(&obj->std);
616 }
617 ZVAL_STRINGL(&temp, (char*)output.bp, output.size-1);
618 zend_hash_str_update(obj->std.properties, "value", sizeof("value") - 1, &temp);
619 }
620
621 tidyBufFree(&output);
622
623 if (obj->ptdoc->errbuf->size) {
624 if (!obj->std.properties) {
625 rebuild_object_properties(&obj->std);
626 }
627 ZVAL_STRINGL(&temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
628 zend_hash_str_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer") - 1, &temp);
629 }
630 }
631
tidy_add_default_properties(PHPTidyObj * obj,tidy_obj_type type)632 static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
633 {
634
635 TidyBuffer buf;
636 TidyAttr tempattr;
637 TidyNode tempnode;
638 zval attribute, children, temp;
639 PHPTidyObj *newobj;
640
641 switch(type) {
642
643 case is_node:
644 if (!obj->std.properties) {
645 rebuild_object_properties(&obj->std);
646 }
647 tidyBufInit(&buf);
648 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
649 ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
650 tidyBufFree(&buf);
651
652 ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
653 ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
654 ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
655 ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
656 ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
657
658 switch(tidyNodeGetType(obj->node)) {
659 case TidyNode_Root:
660 case TidyNode_DocType:
661 case TidyNode_Text:
662 case TidyNode_Comment:
663 break;
664
665 default:
666 ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
667 }
668
669 tempattr = tidyAttrFirst(obj->node);
670
671 if (tempattr) {
672 char *name, *val;
673 array_init(&attribute);
674
675 do {
676 name = (char *)tidyAttrName(tempattr);
677 val = (char *)tidyAttrValue(tempattr);
678 if (name && val) {
679 add_assoc_string(&attribute, name, val);
680 }
681 } while((tempattr = tidyAttrNext(tempattr)));
682 } else {
683 ZVAL_NULL(&attribute);
684 }
685 zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);
686
687 tempnode = tidyGetChild(obj->node);
688
689 if (tempnode) {
690 array_init(&children);
691 do {
692 tidy_instanciate(tidy_ce_node, &temp);
693 newobj = Z_TIDY_P(&temp);
694 newobj->node = tempnode;
695 newobj->type = is_node;
696 newobj->ptdoc = obj->ptdoc;
697 newobj->ptdoc->ref_count++;
698
699 tidy_add_default_properties(newobj, is_node);
700 add_next_index_zval(&children, &temp);
701
702 } while((tempnode = tidyGetNext(tempnode)));
703
704 } else {
705 ZVAL_NULL(&children);
706 }
707
708 zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children);
709
710 break;
711
712 case is_doc:
713 if (!obj->std.properties) {
714 rebuild_object_properties(&obj->std);
715 }
716 ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
717 ADD_PROPERTY_NULL(obj->std.properties, value);
718 break;
719 }
720 }
721
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)722 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
723 {
724 *type = tidyOptGetType(opt);
725
726 switch (*type) {
727 case TidyString: {
728 char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
729 if (val) {
730 return (void *) zend_string_init(val, strlen(val), 0);
731 } else {
732 return (void *) ZSTR_EMPTY_ALLOC();
733 }
734 }
735 break;
736
737 case TidyInteger:
738 return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
739 break;
740
741 case TidyBoolean:
742 return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
743 break;
744 }
745
746 /* should not happen */
747 return NULL;
748 }
749
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)750 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
751 {
752 PHPTidyObj *newobj;
753 TidyNode node;
754 TIDY_FETCH_OBJECT;
755
756 switch (node_type) {
757 case is_root_node:
758 node = tidyGetRoot(obj->ptdoc->doc);
759 break;
760
761 case is_html_node:
762 node = tidyGetHtml(obj->ptdoc->doc);
763 break;
764
765 case is_head_node:
766 node = tidyGetHead(obj->ptdoc->doc);
767 break;
768
769 case is_body_node:
770 node = tidyGetBody(obj->ptdoc->doc);
771 break;
772
773 default:
774 RETURN_NULL();
775 break;
776 }
777
778 if (!node) {
779 RETURN_NULL();
780 }
781
782 tidy_instanciate(tidy_ce_node, return_value);
783 newobj = Z_TIDY_P(return_value);
784 newobj->type = is_node;
785 newobj->ptdoc = obj->ptdoc;
786 newobj->node = node;
787 newobj->ptdoc->ref_count++;
788
789 tidy_add_default_properties(newobj, is_node);
790 }
791
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)792 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
793 {
794 zval *opt_val;
795 zend_string *opt_name;
796
797 ZEND_HASH_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
798 if (opt_name == NULL) {
799 continue;
800 }
801 _php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
802 } ZEND_HASH_FOREACH_END();
803
804 return SUCCESS;
805 }
806
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)807 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
808 {
809 TidyBuffer buf;
810
811 if(enc) {
812 if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
813 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
814 return FAILURE;
815 }
816 }
817
818 obj->ptdoc->initialized = 1;
819
820 tidyBufInit(&buf);
821 tidyBufAttach(&buf, (byte *) string, len);
822 if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
823 php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
824 return FAILURE;
825 }
826 tidy_doc_update_properties(obj);
827
828 return SUCCESS;
829 }
830
PHP_MINIT_FUNCTION(tidy)831 static PHP_MINIT_FUNCTION(tidy)
832 {
833 tidySetMallocCall(php_tidy_malloc);
834 tidySetReallocCall(php_tidy_realloc);
835 tidySetFreeCall(php_tidy_free);
836 tidySetPanicCall(php_tidy_panic);
837
838 REGISTER_INI_ENTRIES();
839 REGISTER_TIDY_CLASS(tidy, doc, NULL, 0);
840 REGISTER_TIDY_CLASS(tidyNode, node, NULL, ZEND_ACC_FINAL);
841
842 tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
843 tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
844
845 tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
846 tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
847
848 _php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU);
849 _php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU);
850
851 php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
852
853 return SUCCESS;
854 }
855
PHP_RINIT_FUNCTION(tidy)856 static PHP_RINIT_FUNCTION(tidy)
857 {
858 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
859 ZEND_TSRMLS_CACHE_UPDATE();
860 #endif
861
862 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
863
864 return SUCCESS;
865 }
866
PHP_RSHUTDOWN_FUNCTION(tidy)867 static PHP_RSHUTDOWN_FUNCTION(tidy)
868 {
869 TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
870
871 return SUCCESS;
872 }
873
PHP_MSHUTDOWN_FUNCTION(tidy)874 static PHP_MSHUTDOWN_FUNCTION(tidy)
875 {
876 UNREGISTER_INI_ENTRIES();
877 return SUCCESS;
878 }
879
PHP_MINFO_FUNCTION(tidy)880 static PHP_MINFO_FUNCTION(tidy)
881 {
882 php_info_print_table_start();
883 php_info_print_table_row(2, "Tidy support", "enabled");
884 #ifdef HAVE_TIDYBUFFIO_H
885 php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
886 #elif defined(HAVE_TIDYP_H)
887 php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
888 #endif
889 #ifdef HAVE_TIDYRELEASEDATE
890 php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
891 #endif
892 php_info_print_table_end();
893
894 DISPLAY_INI_ENTRIES();
895 }
896
PHP_INI_MH(php_tidy_set_clean_output)897 static PHP_INI_MH(php_tidy_set_clean_output)
898 {
899 int status;
900 zend_bool value;
901
902 if (ZSTR_LEN(new_value)==2 && strcasecmp("on", ZSTR_VAL(new_value))==0) {
903 value = (zend_bool) 1;
904 } else if (ZSTR_LEN(new_value)==3 && strcasecmp("yes", ZSTR_VAL(new_value))==0) {
905 value = (zend_bool) 1;
906 } else if (ZSTR_LEN(new_value)==4 && strcasecmp("true", ZSTR_VAL(new_value))==0) {
907 value = (zend_bool) 1;
908 } else {
909 value = (zend_bool) atoi(ZSTR_VAL(new_value));
910 }
911
912 if (stage == PHP_INI_STAGE_RUNTIME) {
913 status = php_output_get_status();
914
915 if (value && (status & PHP_OUTPUT_WRITTEN)) {
916 php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
917 return FAILURE;
918 }
919 if (status & PHP_OUTPUT_SENT) {
920 php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
921 return FAILURE;
922 }
923 }
924
925 status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
926
927 if (stage == PHP_INI_STAGE_RUNTIME && value) {
928 if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
929 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
930 }
931 }
932
933 return status;
934 }
935
936 /*
937 * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
938 */
939
php_tidy_clean_output_start(const char * name,size_t name_len)940 static void php_tidy_clean_output_start(const char *name, size_t name_len)
941 {
942 php_output_handler *h;
943
944 if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
945 php_output_handler_start(h);
946 }
947 }
948
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)949 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
950 {
951 if (chunk_size) {
952 php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
953 return NULL;
954 }
955 if (!TG(clean_output)) {
956 TG(clean_output) = 1;
957 }
958 return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
959 }
960
php_tidy_output_handler(void ** nothing,php_output_context * output_context)961 static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
962 {
963 int status = FAILURE;
964 TidyDoc doc;
965 TidyBuffer inbuf, outbuf, errbuf;
966
967 if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
968 doc = tidyCreate();
969 tidyBufInit(&errbuf);
970
971 if (0 == tidySetErrorBuffer(doc, &errbuf)) {
972 tidyOptSetBool(doc, TidyForceOutput, yes);
973 tidyOptSetBool(doc, TidyMark, no);
974
975 if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
976 php_error_docref(NULL, E_WARNING, "Input string is too long");
977 return status;
978 }
979
980 TIDY_SET_DEFAULT_CONFIG(doc);
981
982 tidyBufInit(&inbuf);
983 tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
984
985 if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
986 tidyBufInit(&outbuf);
987 tidySaveBuffer(doc, &outbuf);
988 FIX_BUFFER(&outbuf);
989 output_context->out.data = (char *) outbuf.bp;
990 output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
991 output_context->out.free = 1;
992 status = SUCCESS;
993 }
994 }
995
996 tidyRelease(doc);
997 tidyBufFree(&errbuf);
998 }
999
1000 return status;
1001 }
1002
1003 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1004 PHP_FUNCTION(tidy_parse_string)
1005 {
1006 char *enc = NULL;
1007 size_t enc_len = 0;
1008 zend_string *input, *options_str = NULL;
1009 HashTable *options_ht = NULL;
1010 PHPTidyObj *obj;
1011
1012 ZEND_PARSE_PARAMETERS_START(1, 3)
1013 Z_PARAM_STR(input)
1014 Z_PARAM_OPTIONAL
1015 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1016 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1017 ZEND_PARSE_PARAMETERS_END();
1018
1019 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1020 zend_argument_value_error(1, "is too long");
1021 RETURN_THROWS();
1022 }
1023
1024 tidy_instanciate(tidy_ce_doc, return_value);
1025 obj = Z_TIDY_P(return_value);
1026
1027 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1028
1029 if (php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1030 zval_ptr_dtor(return_value);
1031 RETURN_FALSE;
1032 }
1033 }
1034 /* }}} */
1035
1036 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1037 PHP_FUNCTION(tidy_get_error_buffer)
1038 {
1039 TIDY_FETCH_OBJECT;
1040
1041 if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1042 RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1043 } else {
1044 RETURN_FALSE;
1045 }
1046 }
1047 /* }}} */
1048
1049 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1050 PHP_FUNCTION(tidy_get_output)
1051 {
1052 TidyBuffer output;
1053 TIDY_FETCH_OBJECT;
1054
1055 tidyBufInit(&output);
1056 tidySaveBuffer(obj->ptdoc->doc, &output);
1057 FIX_BUFFER(&output);
1058 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1059 tidyBufFree(&output);
1060 }
1061 /* }}} */
1062
1063 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1064 PHP_FUNCTION(tidy_parse_file)
1065 {
1066 char *enc = NULL;
1067 size_t enc_len = 0;
1068 zend_bool use_include_path = 0;
1069 zend_string *inputfile, *contents, *options_str = NULL;
1070 HashTable *options_ht = NULL;
1071
1072 PHPTidyObj *obj;
1073
1074 ZEND_PARSE_PARAMETERS_START(1, 4)
1075 Z_PARAM_PATH_STR(inputfile)
1076 Z_PARAM_OPTIONAL
1077 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1078 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1079 Z_PARAM_BOOL(use_include_path)
1080 ZEND_PARSE_PARAMETERS_END();
1081
1082 tidy_instanciate(tidy_ce_doc, return_value);
1083 obj = Z_TIDY_P(return_value);
1084
1085 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1086 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1087 RETURN_FALSE;
1088 }
1089
1090 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1091 zend_value_error("Input string is too long");
1092 RETURN_THROWS();
1093 }
1094
1095 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1096
1097 if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1098 zval_ptr_dtor(return_value);
1099 RETVAL_FALSE;
1100 }
1101
1102 zend_string_release_ex(contents, 0);
1103 }
1104 /* }}} */
1105
1106 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1107 PHP_FUNCTION(tidy_clean_repair)
1108 {
1109 TIDY_FETCH_OBJECT;
1110
1111 if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1112 tidy_doc_update_properties(obj);
1113 RETURN_TRUE;
1114 }
1115
1116 RETURN_FALSE;
1117 }
1118 /* }}} */
1119
1120 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1121 PHP_FUNCTION(tidy_repair_string)
1122 {
1123 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, FALSE);
1124 }
1125 /* }}} */
1126
1127 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1128 PHP_FUNCTION(tidy_repair_file)
1129 {
1130 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, TRUE);
1131 }
1132 /* }}} */
1133
1134 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1135 PHP_FUNCTION(tidy_diagnose)
1136 {
1137 TIDY_FETCH_OBJECT;
1138
1139 if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1140 tidy_doc_update_properties(obj);
1141 RETURN_TRUE;
1142 }
1143
1144 RETURN_FALSE;
1145 }
1146 /* }}} */
1147
1148 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1149 PHP_FUNCTION(tidy_get_release)
1150 {
1151 if (zend_parse_parameters_none() == FAILURE) {
1152 RETURN_THROWS();
1153 }
1154
1155 #ifdef HAVE_TIDYRELEASEDATE
1156 RETURN_STRING((char *)tidyReleaseDate());
1157 #else
1158 RETURN_STRING((char *)"unknown");
1159 #endif
1160 }
1161 /* }}} */
1162
1163
1164 #ifdef HAVE_TIDYOPTGETDOC
1165 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1166 PHP_FUNCTION(tidy_get_opt_doc)
1167 {
1168 PHPTidyObj *obj;
1169 char *optval, *optname;
1170 size_t optname_len;
1171 TidyOption opt;
1172 zval *object;
1173
1174 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1175 RETURN_THROWS();
1176 }
1177
1178 obj = Z_TIDY_P(object);
1179
1180 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1181
1182 if (!opt) {
1183 zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1184 RETURN_THROWS();
1185 }
1186
1187 if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1188 RETURN_STRING(optval);
1189 }
1190
1191 RETURN_FALSE;
1192 }
1193 /* }}} */
1194 #endif
1195
1196
1197 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1198 PHP_FUNCTION(tidy_get_config)
1199 {
1200 TidyIterator itOpt;
1201 char *opt_name;
1202 void *opt_value;
1203 TidyOptionType optt;
1204
1205 TIDY_FETCH_OBJECT;
1206
1207 itOpt = tidyGetOptionList(obj->ptdoc->doc);
1208
1209 array_init(return_value);
1210
1211 while (itOpt) {
1212 TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1213
1214 opt_name = (char *)tidyOptGetName(opt);
1215 opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1216 switch (optt) {
1217 case TidyString:
1218 add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1219 break;
1220
1221 case TidyInteger:
1222 add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1223 break;
1224
1225 case TidyBoolean:
1226 add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1227 break;
1228 }
1229 }
1230
1231 return;
1232 }
1233 /* }}} */
1234
1235 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1236 PHP_FUNCTION(tidy_get_status)
1237 {
1238 TIDY_FETCH_OBJECT;
1239
1240 RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1241 }
1242 /* }}} */
1243
1244 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1245 PHP_FUNCTION(tidy_get_html_ver)
1246 {
1247 TIDY_FETCH_INITIALIZED_OBJECT;
1248
1249 RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1250 }
1251 /* }}} */
1252
1253 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1254 PHP_FUNCTION(tidy_is_xhtml)
1255 {
1256 TIDY_FETCH_INITIALIZED_OBJECT;
1257
1258 RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1259 }
1260 /* }}} */
1261
1262 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1263 PHP_FUNCTION(tidy_is_xml)
1264 {
1265 TIDY_FETCH_INITIALIZED_OBJECT;
1266
1267 RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1268 }
1269 /* }}} */
1270
1271 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1272 PHP_FUNCTION(tidy_error_count)
1273 {
1274 TIDY_FETCH_OBJECT;
1275
1276 RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1277 }
1278 /* }}} */
1279
1280 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1281 PHP_FUNCTION(tidy_warning_count)
1282 {
1283 TIDY_FETCH_OBJECT;
1284
1285 RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1286 }
1287 /* }}} */
1288
1289 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1290 PHP_FUNCTION(tidy_access_count)
1291 {
1292 TIDY_FETCH_OBJECT;
1293
1294 RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1295 }
1296 /* }}} */
1297
1298 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1299 PHP_FUNCTION(tidy_config_count)
1300 {
1301 TIDY_FETCH_OBJECT;
1302
1303 RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1304 }
1305 /* }}} */
1306
1307 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1308 PHP_FUNCTION(tidy_getopt)
1309 {
1310 PHPTidyObj *obj;
1311 char *optname;
1312 void *optval;
1313 size_t optname_len;
1314 TidyOption opt;
1315 TidyOptionType optt;
1316 zval *object;
1317
1318 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1319 RETURN_THROWS();
1320 }
1321
1322 obj = Z_TIDY_P(object);
1323
1324 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1325
1326 if (!opt) {
1327 zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1328 RETURN_THROWS();
1329 }
1330
1331 optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1332 switch (optt) {
1333 case TidyString:
1334 RETVAL_STR((zend_string*)optval);
1335 return;
1336
1337 case TidyInteger:
1338 RETURN_LONG((zend_long)optval);
1339 break;
1340
1341 case TidyBoolean:
1342 if (optval) {
1343 RETURN_TRUE;
1344 } else {
1345 RETURN_FALSE;
1346 }
1347 break;
1348
1349 default:
1350 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1351 break;
1352 }
1353
1354 RETURN_FALSE;
1355 }
1356 /* }}} */
1357
PHP_METHOD(tidy,__construct)1358 PHP_METHOD(tidy, __construct)
1359 {
1360 char *enc = NULL;
1361 size_t enc_len = 0;
1362 zend_bool use_include_path = 0;
1363 HashTable *options_ht = NULL;
1364 zend_string *contents, *inputfile = NULL, *options_str = NULL;
1365 PHPTidyObj *obj;
1366
1367 ZEND_PARSE_PARAMETERS_START(0, 4)
1368 Z_PARAM_OPTIONAL
1369 Z_PARAM_PATH_STR_OR_NULL(inputfile)
1370 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1371 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1372 Z_PARAM_BOOL(use_include_path)
1373 ZEND_PARSE_PARAMETERS_END();
1374
1375 TIDY_SET_CONTEXT;
1376 obj = Z_TIDY_P(object);
1377
1378 if (inputfile) {
1379 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1380 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1381 return;
1382 }
1383
1384 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1385 zend_value_error("Input string is too long");
1386 RETURN_THROWS();
1387 }
1388
1389 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1390
1391 php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1392
1393 zend_string_release_ex(contents, 0);
1394 }
1395 }
1396
PHP_METHOD(tidy,parseFile)1397 PHP_METHOD(tidy, parseFile)
1398 {
1399 char *enc = NULL;
1400 size_t enc_len = 0;
1401 zend_bool use_include_path = 0;
1402 HashTable *options_ht = NULL;
1403 zend_string *inputfile, *contents, *options_str = NULL;
1404 PHPTidyObj *obj;
1405
1406 ZEND_PARSE_PARAMETERS_START(1, 4)
1407 Z_PARAM_PATH_STR(inputfile)
1408 Z_PARAM_OPTIONAL
1409 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1410 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1411 Z_PARAM_BOOL(use_include_path)
1412 ZEND_PARSE_PARAMETERS_END();
1413
1414 TIDY_SET_CONTEXT;
1415 obj = Z_TIDY_P(object);
1416
1417 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1418 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1419 RETURN_FALSE;
1420 }
1421
1422 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1423 zend_value_error("Input string is too long");
1424 RETURN_THROWS();
1425 }
1426
1427 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1428
1429 if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1430 RETVAL_FALSE;
1431 } else {
1432 RETVAL_TRUE;
1433 }
1434
1435 zend_string_release_ex(contents, 0);
1436 }
1437
PHP_METHOD(tidy,parseString)1438 PHP_METHOD(tidy, parseString)
1439 {
1440 char *enc = NULL;
1441 size_t enc_len = 0;
1442 HashTable *options_ht = NULL;
1443 PHPTidyObj *obj;
1444 zend_string *input, *options_str = NULL;
1445
1446 ZEND_PARSE_PARAMETERS_START(1, 3)
1447 Z_PARAM_STR(input)
1448 Z_PARAM_OPTIONAL
1449 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1450 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1451 ZEND_PARSE_PARAMETERS_END();
1452
1453 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1454 zend_argument_value_error(1, "is too long");
1455 RETURN_THROWS();
1456 }
1457
1458 TIDY_SET_CONTEXT;
1459 obj = Z_TIDY_P(object);
1460
1461 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1462
1463 if(php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1464 RETURN_TRUE;
1465 }
1466
1467 RETURN_FALSE;
1468 }
1469
1470
1471 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1472 PHP_FUNCTION(tidy_get_root)
1473 {
1474 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1475 }
1476 /* }}} */
1477
1478 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1479 PHP_FUNCTION(tidy_get_html)
1480 {
1481 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1482 }
1483 /* }}} */
1484
1485 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1486 PHP_FUNCTION(tidy_get_head)
1487 {
1488 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1489 }
1490 /* }}} */
1491
1492 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1493 PHP_FUNCTION(tidy_get_body)
1494 {
1495 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1496 }
1497 /* }}} */
1498
1499 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1500 PHP_METHOD(tidyNode, hasChildren)
1501 {
1502 TIDY_FETCH_ONLY_OBJECT;
1503
1504 if (tidyGetChild(obj->node)) {
1505 RETURN_TRUE;
1506 } else {
1507 RETURN_FALSE;
1508 }
1509 }
1510 /* }}} */
1511
1512 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1513 PHP_METHOD(tidyNode, hasSiblings)
1514 {
1515 TIDY_FETCH_ONLY_OBJECT;
1516
1517 if (obj->node && tidyGetNext(obj->node)) {
1518 RETURN_TRUE;
1519 } else {
1520 RETURN_FALSE;
1521 }
1522 }
1523 /* }}} */
1524
1525 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1526 PHP_METHOD(tidyNode, isComment)
1527 {
1528 TIDY_FETCH_ONLY_OBJECT;
1529
1530 if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1531 RETURN_TRUE;
1532 } else {
1533 RETURN_FALSE;
1534 }
1535 }
1536 /* }}} */
1537
1538 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1539 PHP_METHOD(tidyNode, isHtml)
1540 {
1541 TIDY_FETCH_ONLY_OBJECT;
1542
1543 switch (tidyNodeGetType(obj->node)) {
1544 case TidyNode_Start:
1545 case TidyNode_End:
1546 case TidyNode_StartEnd:
1547 RETURN_TRUE;
1548 default:
1549 RETURN_FALSE;
1550 }
1551 }
1552 /* }}} */
1553
1554 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1555 PHP_METHOD(tidyNode, isText)
1556 {
1557 TIDY_FETCH_ONLY_OBJECT;
1558
1559 if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1560 RETURN_TRUE;
1561 } else {
1562 RETURN_FALSE;
1563 }
1564 }
1565 /* }}} */
1566
1567 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1568 PHP_METHOD(tidyNode, isJste)
1569 {
1570 TIDY_FETCH_ONLY_OBJECT;
1571
1572 if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1573 RETURN_TRUE;
1574 } else {
1575 RETURN_FALSE;
1576 }
1577 }
1578 /* }}} */
1579
1580 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1581 PHP_METHOD(tidyNode, isAsp)
1582 {
1583 TIDY_FETCH_ONLY_OBJECT;
1584
1585 if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1586 RETURN_TRUE;
1587 } else {
1588 RETURN_FALSE;
1589 }
1590 }
1591 /* }}} */
1592
1593 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1594 PHP_METHOD(tidyNode, isPhp)
1595 {
1596 TIDY_FETCH_ONLY_OBJECT;
1597
1598 if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1599 RETURN_TRUE;
1600 } else {
1601 RETURN_FALSE;
1602 }
1603 }
1604 /* }}} */
1605
1606 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1607 PHP_METHOD(tidyNode, getParent)
1608 {
1609 TidyNode parent_node;
1610 PHPTidyObj *newobj;
1611 TIDY_FETCH_ONLY_OBJECT;
1612
1613 parent_node = tidyGetParent(obj->node);
1614 if(parent_node) {
1615 tidy_instanciate(tidy_ce_node, return_value);
1616 newobj = Z_TIDY_P(return_value);
1617 newobj->node = parent_node;
1618 newobj->type = is_node;
1619 newobj->ptdoc = obj->ptdoc;
1620 newobj->ptdoc->ref_count++;
1621 tidy_add_default_properties(newobj, is_node);
1622 } else {
1623 ZVAL_NULL(return_value);
1624 }
1625 }
1626 /* }}} */
1627
1628
1629 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1630 PHP_METHOD(tidyNode, __construct)
1631 {
1632 zend_throw_error(NULL, "You should not create a tidyNode manually");
1633 }
1634 /* }}} */
1635
_php_tidy_register_nodetypes(INIT_FUNC_ARGS)1636 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS)
1637 {
1638 TIDY_NODE_CONST(ROOT, Root);
1639 TIDY_NODE_CONST(DOCTYPE, DocType);
1640 TIDY_NODE_CONST(COMMENT, Comment);
1641 TIDY_NODE_CONST(PROCINS, ProcIns);
1642 TIDY_NODE_CONST(TEXT, Text);
1643 TIDY_NODE_CONST(START, Start);
1644 TIDY_NODE_CONST(END, End);
1645 TIDY_NODE_CONST(STARTEND, StartEnd);
1646 TIDY_NODE_CONST(CDATA, CDATA);
1647 TIDY_NODE_CONST(SECTION, Section);
1648 TIDY_NODE_CONST(ASP, Asp);
1649 TIDY_NODE_CONST(JSTE, Jste);
1650 TIDY_NODE_CONST(PHP, Php);
1651 TIDY_NODE_CONST(XMLDECL, XmlDecl);
1652 }
1653
_php_tidy_register_tags(INIT_FUNC_ARGS)1654 static void _php_tidy_register_tags(INIT_FUNC_ARGS)
1655 {
1656 TIDY_TAG_CONST(UNKNOWN);
1657 TIDY_TAG_CONST(A);
1658 TIDY_TAG_CONST(ABBR);
1659 TIDY_TAG_CONST(ACRONYM);
1660 TIDY_TAG_CONST(ADDRESS);
1661 TIDY_TAG_CONST(ALIGN);
1662 TIDY_TAG_CONST(APPLET);
1663 TIDY_TAG_CONST(AREA);
1664 TIDY_TAG_CONST(B);
1665 TIDY_TAG_CONST(BASE);
1666 TIDY_TAG_CONST(BASEFONT);
1667 TIDY_TAG_CONST(BDO);
1668 TIDY_TAG_CONST(BGSOUND);
1669 TIDY_TAG_CONST(BIG);
1670 TIDY_TAG_CONST(BLINK);
1671 TIDY_TAG_CONST(BLOCKQUOTE);
1672 TIDY_TAG_CONST(BODY);
1673 TIDY_TAG_CONST(BR);
1674 TIDY_TAG_CONST(BUTTON);
1675 TIDY_TAG_CONST(CAPTION);
1676 TIDY_TAG_CONST(CENTER);
1677 TIDY_TAG_CONST(CITE);
1678 TIDY_TAG_CONST(CODE);
1679 TIDY_TAG_CONST(COL);
1680 TIDY_TAG_CONST(COLGROUP);
1681 TIDY_TAG_CONST(COMMENT);
1682 TIDY_TAG_CONST(DD);
1683 TIDY_TAG_CONST(DEL);
1684 TIDY_TAG_CONST(DFN);
1685 TIDY_TAG_CONST(DIR);
1686 TIDY_TAG_CONST(DIV);
1687 TIDY_TAG_CONST(DL);
1688 TIDY_TAG_CONST(DT);
1689 TIDY_TAG_CONST(EM);
1690 TIDY_TAG_CONST(EMBED);
1691 TIDY_TAG_CONST(FIELDSET);
1692 TIDY_TAG_CONST(FONT);
1693 TIDY_TAG_CONST(FORM);
1694 TIDY_TAG_CONST(FRAME);
1695 TIDY_TAG_CONST(FRAMESET);
1696 TIDY_TAG_CONST(H1);
1697 TIDY_TAG_CONST(H2);
1698 TIDY_TAG_CONST(H3);
1699 TIDY_TAG_CONST(H4);
1700 TIDY_TAG_CONST(H5);
1701 TIDY_TAG_CONST(H6);
1702 TIDY_TAG_CONST(HEAD);
1703 TIDY_TAG_CONST(HR);
1704 TIDY_TAG_CONST(HTML);
1705 TIDY_TAG_CONST(I);
1706 TIDY_TAG_CONST(IFRAME);
1707 TIDY_TAG_CONST(ILAYER);
1708 TIDY_TAG_CONST(IMG);
1709 TIDY_TAG_CONST(INPUT);
1710 TIDY_TAG_CONST(INS);
1711 TIDY_TAG_CONST(ISINDEX);
1712 TIDY_TAG_CONST(KBD);
1713 TIDY_TAG_CONST(KEYGEN);
1714 TIDY_TAG_CONST(LABEL);
1715 TIDY_TAG_CONST(LAYER);
1716 TIDY_TAG_CONST(LEGEND);
1717 TIDY_TAG_CONST(LI);
1718 TIDY_TAG_CONST(LINK);
1719 TIDY_TAG_CONST(LISTING);
1720 TIDY_TAG_CONST(MAP);
1721 TIDY_TAG_CONST(MARQUEE);
1722 TIDY_TAG_CONST(MENU);
1723 TIDY_TAG_CONST(META);
1724 TIDY_TAG_CONST(MULTICOL);
1725 TIDY_TAG_CONST(NOBR);
1726 TIDY_TAG_CONST(NOEMBED);
1727 TIDY_TAG_CONST(NOFRAMES);
1728 TIDY_TAG_CONST(NOLAYER);
1729 TIDY_TAG_CONST(NOSAVE);
1730 TIDY_TAG_CONST(NOSCRIPT);
1731 TIDY_TAG_CONST(OBJECT);
1732 TIDY_TAG_CONST(OL);
1733 TIDY_TAG_CONST(OPTGROUP);
1734 TIDY_TAG_CONST(OPTION);
1735 TIDY_TAG_CONST(P);
1736 TIDY_TAG_CONST(PARAM);
1737 TIDY_TAG_CONST(PLAINTEXT);
1738 TIDY_TAG_CONST(PRE);
1739 TIDY_TAG_CONST(Q);
1740 TIDY_TAG_CONST(RB);
1741 TIDY_TAG_CONST(RBC);
1742 TIDY_TAG_CONST(RP);
1743 TIDY_TAG_CONST(RT);
1744 TIDY_TAG_CONST(RTC);
1745 TIDY_TAG_CONST(RUBY);
1746 TIDY_TAG_CONST(S);
1747 TIDY_TAG_CONST(SAMP);
1748 TIDY_TAG_CONST(SCRIPT);
1749 TIDY_TAG_CONST(SELECT);
1750 TIDY_TAG_CONST(SERVER);
1751 TIDY_TAG_CONST(SERVLET);
1752 TIDY_TAG_CONST(SMALL);
1753 TIDY_TAG_CONST(SPACER);
1754 TIDY_TAG_CONST(SPAN);
1755 TIDY_TAG_CONST(STRIKE);
1756 TIDY_TAG_CONST(STRONG);
1757 TIDY_TAG_CONST(STYLE);
1758 TIDY_TAG_CONST(SUB);
1759 TIDY_TAG_CONST(SUP);
1760 TIDY_TAG_CONST(TABLE);
1761 TIDY_TAG_CONST(TBODY);
1762 TIDY_TAG_CONST(TD);
1763 TIDY_TAG_CONST(TEXTAREA);
1764 TIDY_TAG_CONST(TFOOT);
1765 TIDY_TAG_CONST(TH);
1766 TIDY_TAG_CONST(THEAD);
1767 TIDY_TAG_CONST(TITLE);
1768 TIDY_TAG_CONST(TR);
1769 TIDY_TAG_CONST(TT);
1770 TIDY_TAG_CONST(U);
1771 TIDY_TAG_CONST(UL);
1772 TIDY_TAG_CONST(VAR);
1773 TIDY_TAG_CONST(WBR);
1774 TIDY_TAG_CONST(XMP);
1775 # ifdef HAVE_TIDYBUFFIO_H
1776 TIDY_TAG_CONST(ARTICLE);
1777 TIDY_TAG_CONST(ASIDE);
1778 TIDY_TAG_CONST(AUDIO);
1779 TIDY_TAG_CONST(BDI);
1780 TIDY_TAG_CONST(CANVAS);
1781 TIDY_TAG_CONST(COMMAND);
1782 TIDY_TAG_CONST(DATALIST);
1783 TIDY_TAG_CONST(DETAILS);
1784 TIDY_TAG_CONST(DIALOG);
1785 TIDY_TAG_CONST(FIGCAPTION);
1786 TIDY_TAG_CONST(FIGURE);
1787 TIDY_TAG_CONST(FOOTER);
1788 TIDY_TAG_CONST(HEADER);
1789 TIDY_TAG_CONST(HGROUP);
1790 TIDY_TAG_CONST(MAIN);
1791 TIDY_TAG_CONST(MARK);
1792 TIDY_TAG_CONST(MENUITEM);
1793 TIDY_TAG_CONST(METER);
1794 TIDY_TAG_CONST(NAV);
1795 TIDY_TAG_CONST(OUTPUT);
1796 TIDY_TAG_CONST(PROGRESS);
1797 TIDY_TAG_CONST(SECTION);
1798 TIDY_TAG_CONST(SOURCE);
1799 TIDY_TAG_CONST(SUMMARY);
1800 TIDY_TAG_CONST(TEMPLATE);
1801 TIDY_TAG_CONST(TIME);
1802 TIDY_TAG_CONST(TRACK);
1803 TIDY_TAG_CONST(VIDEO);
1804 # endif
1805 }
1806
1807 #endif
1808