1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: John Coggeshall <john@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include "php.h"
22 #include "php_tidy.h"
23
24 #ifdef HAVE_TIDY
25
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40
41 #include "tidy_arginfo.h"
42
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47
48 /* {{{ ext/tidy macros */
49 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
50
51 #define TIDY_SET_CONTEXT \
52 zval *object = getThis();
53
54 #define TIDY_FETCH_OBJECT \
55 PHPTidyObj *obj; \
56 zval *object; \
57 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) == FAILURE) { \
58 RETURN_THROWS(); \
59 } \
60 obj = Z_TIDY_P(object); \
61
62 #define TIDY_FETCH_INITIALIZED_OBJECT \
63 TIDY_FETCH_OBJECT; \
64 if (!obj->ptdoc->initialized) { \
65 zend_throw_error(NULL, "tidy object is not initialized"); \
66 return; \
67 }
68
69 #define TIDY_FETCH_ONLY_OBJECT \
70 PHPTidyObj *obj; \
71 TIDY_SET_CONTEXT; \
72 if (zend_parse_parameters_none() == FAILURE) { \
73 RETURN_THROWS(); \
74 } \
75 obj = Z_TIDY_P(object); \
76
77 #define TIDY_APPLY_CONFIG(_doc, _val_str, _val_ht) \
78 if (_val_ht) { \
79 _php_tidy_apply_config_array(_doc, _val_ht); \
80 } else if (_val_str) { \
81 TIDY_OPEN_BASE_DIR_CHECK(ZSTR_VAL(_val_str)); \
82 php_tidy_load_config(_doc, ZSTR_VAL(_val_str)); \
83 }
84
85
86 #define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
87 #define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
88
89 #define ADD_PROPERTY_STRING(_table, _key, _string) \
90 { \
91 zval tmp; \
92 if (_string) { \
93 ZVAL_STRING(&tmp, (char *)_string); \
94 } else { \
95 ZVAL_EMPTY_STRING(&tmp); \
96 } \
97 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
98 }
99
100 #define ADD_PROPERTY_STRINGL(_table, _key, _string, _len) \
101 { \
102 zval tmp; \
103 if (_string) { \
104 ZVAL_STRINGL(&tmp, (char *)_string, _len); \
105 } else { \
106 ZVAL_EMPTY_STRING(&tmp); \
107 } \
108 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
109 }
110
111 #define ADD_PROPERTY_LONG(_table, _key, _long) \
112 { \
113 zval tmp; \
114 ZVAL_LONG(&tmp, _long); \
115 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
116 }
117
118 #define ADD_PROPERTY_NULL(_table, _key) \
119 { \
120 zval tmp; \
121 ZVAL_NULL(&tmp); \
122 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
123 }
124
125 #define ADD_PROPERTY_BOOL(_table, _key, _bool) \
126 { \
127 zval tmp; \
128 ZVAL_BOOL(&tmp, _bool); \
129 zend_hash_str_update(_table, #_key, sizeof(#_key) - 1, &tmp); \
130 }
131
132 #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
133 if (php_check_open_basedir(filename)) { \
134 RETURN_FALSE; \
135 } \
136
137 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
138 if (TG(default_config) && TG(default_config)[0]) { \
139 php_tidy_load_config(_doc, TG(default_config)); \
140 }
141 /* }}} */
142
143 /* {{{ ext/tidy structs */
144 typedef struct _PHPTidyDoc PHPTidyDoc;
145 typedef struct _PHPTidyObj PHPTidyObj;
146
147 typedef enum {
148 is_node,
149 is_doc
150 } tidy_obj_type;
151
152 typedef enum {
153 is_root_node,
154 is_html_node,
155 is_head_node,
156 is_body_node
157 } tidy_base_nodetypes;
158
159 struct _PHPTidyDoc {
160 TidyDoc doc;
161 TidyBuffer *errbuf;
162 unsigned int ref_count;
163 unsigned int initialized:1;
164 };
165
166 struct _PHPTidyObj {
167 TidyNode node;
168 tidy_obj_type type;
169 PHPTidyDoc *ptdoc;
170 zend_object std;
171 };
172
php_tidy_fetch_object(zend_object * obj)173 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
174 return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
175 }
176
177 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
178 /* }}} */
179
180 /* {{{ ext/tidy prototypes */
181 static zend_string *php_tidy_file_to_mem(char *, bool);
182 static void tidy_object_free_storage(zend_object *);
183 static zend_object *tidy_object_new_node(zend_class_entry *);
184 static zend_object *tidy_object_new_doc(zend_class_entry *);
185 static zval * tidy_instanciate(zend_class_entry *, zval *);
186 static int tidy_doc_cast_handler(zend_object *, zval *, int);
187 static int tidy_node_cast_handler(zend_object *, zval *, int);
188 static void tidy_doc_update_properties(PHPTidyObj *);
189 static void tidy_add_default_properties(PHPTidyObj *, tidy_obj_type);
190 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
191 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
192 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
193 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
194 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
195 static void _php_tidy_register_tags(INIT_FUNC_ARGS);
196 static PHP_INI_MH(php_tidy_set_clean_output);
197 static void php_tidy_clean_output_start(const char *name, size_t name_len);
198 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
199 static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
200
201 static PHP_MINIT_FUNCTION(tidy);
202 static PHP_MSHUTDOWN_FUNCTION(tidy);
203 static PHP_RINIT_FUNCTION(tidy);
204 static PHP_RSHUTDOWN_FUNCTION(tidy);
205 static PHP_MINFO_FUNCTION(tidy);
206
207 ZEND_DECLARE_MODULE_GLOBALS(tidy)
208
209 PHP_INI_BEGIN()
210 STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals)
211 STD_PHP_INI_ENTRY("tidy.clean_output", "0", PHP_INI_USER, php_tidy_set_clean_output, clean_output, zend_tidy_globals, tidy_globals)
212 PHP_INI_END()
213
214 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
215
216 static zend_object_handlers tidy_object_handlers_doc;
217 static zend_object_handlers tidy_object_handlers_node;
218
219 zend_module_entry tidy_module_entry = {
220 STANDARD_MODULE_HEADER,
221 "tidy",
222 ext_functions,
223 PHP_MINIT(tidy),
224 PHP_MSHUTDOWN(tidy),
225 PHP_RINIT(tidy),
226 PHP_RSHUTDOWN(tidy),
227 PHP_MINFO(tidy),
228 PHP_TIDY_VERSION,
229 PHP_MODULE_GLOBALS(tidy),
230 NULL,
231 NULL,
232 NULL,
233 STANDARD_MODULE_PROPERTIES_EX
234 };
235
236 #ifdef COMPILE_DL_TIDY
237 #ifdef ZTS
238 ZEND_TSRMLS_CACHE_DEFINE()
239 #endif
ZEND_GET_MODULE(tidy)240 ZEND_GET_MODULE(tidy)
241 #endif
242
243 static void* TIDY_CALL php_tidy_malloc(size_t len)
244 {
245 return emalloc(len);
246 }
247
php_tidy_realloc(void * buf,size_t len)248 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
249 {
250 return erealloc(buf, len);
251 }
252
php_tidy_free(void * buf)253 static void TIDY_CALL php_tidy_free(void *buf)
254 {
255 efree(buf);
256 }
257
php_tidy_panic(ctmbstr msg)258 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
259 {
260 php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
261 }
262
php_tidy_load_config(TidyDoc doc,const char * path)263 static void php_tidy_load_config(TidyDoc doc, const char *path)
264 {
265 int ret = tidyLoadConfig(doc, path);
266 if (ret < 0) {
267 php_error_docref(NULL, E_WARNING, "Could not load the Tidy configuration file \"%s\"", path);
268 } else if (ret > 0) {
269 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the Tidy configuration file \"%s\"", path);
270 }
271 }
272
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)273 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
274 {
275 TidyOption opt = tidyGetOptionByName(doc, optname);
276 zend_string *str, *tmp_str;
277 zend_long lval;
278
279 if (!opt) {
280 php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
281 return FAILURE;
282 }
283
284 if (tidyOptIsReadOnly(opt)) {
285 php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
286 return FAILURE;
287 }
288
289 switch(tidyOptGetType(opt)) {
290 case TidyString:
291 str = zval_get_tmp_string(value, &tmp_str);
292 if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
293 zend_tmp_string_release(tmp_str);
294 return SUCCESS;
295 }
296 zend_tmp_string_release(tmp_str);
297 break;
298
299 case TidyInteger:
300 lval = zval_get_long(value);
301 if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
302 return SUCCESS;
303 }
304 break;
305
306 case TidyBoolean:
307 lval = zval_get_long(value);
308 if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
309 return SUCCESS;
310 }
311 break;
312
313 default:
314 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
315 break;
316 }
317
318 return FAILURE;
319 }
320
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,bool is_file)321 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, bool is_file)
322 {
323 char *enc = NULL;
324 size_t enc_len = 0;
325 TidyDoc doc;
326 TidyBuffer *errbuf;
327 zend_string *data, *arg1, *config_str = NULL;
328 HashTable *config_ht = NULL;
329
330 if (is_file) {
331 bool use_include_path = 0;
332
333 ZEND_PARSE_PARAMETERS_START(1, 4)
334 Z_PARAM_PATH_STR(arg1)
335 Z_PARAM_OPTIONAL
336 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
337 Z_PARAM_STRING(enc, enc_len)
338 Z_PARAM_BOOL(use_include_path)
339 ZEND_PARSE_PARAMETERS_END();
340
341 if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
342 RETURN_FALSE;
343 }
344 } else {
345 ZEND_PARSE_PARAMETERS_START(1, 3)
346 Z_PARAM_STR(arg1)
347 Z_PARAM_OPTIONAL
348 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
349 Z_PARAM_STRING(enc, enc_len)
350 ZEND_PARSE_PARAMETERS_END();
351
352 data = arg1;
353 }
354
355 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
356 zend_argument_value_error(1, "is too long");
357 RETURN_THROWS();
358 }
359
360 doc = tidyCreate();
361 errbuf = emalloc(sizeof(TidyBuffer));
362 tidyBufInit(errbuf);
363
364 if (tidySetErrorBuffer(doc, errbuf) != 0) {
365 tidyBufFree(errbuf);
366 efree(errbuf);
367 tidyRelease(doc);
368 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
369 }
370
371 tidyOptSetBool(doc, TidyForceOutput, yes);
372 tidyOptSetBool(doc, TidyMark, no);
373
374 TIDY_SET_DEFAULT_CONFIG(doc);
375
376 TIDY_APPLY_CONFIG(doc, config_str, config_ht);
377
378 if(enc_len) {
379 if (tidySetCharEncoding(doc, enc) < 0) {
380 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
381 RETVAL_FALSE;
382 }
383 }
384
385 if (data) {
386 TidyBuffer buf;
387
388 tidyBufInit(&buf);
389 tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
390
391 if (tidyParseBuffer(doc, &buf) < 0) {
392 php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
393 RETVAL_FALSE;
394 } else {
395 if (tidyCleanAndRepair(doc) >= 0) {
396 TidyBuffer output;
397 tidyBufInit(&output);
398
399 tidySaveBuffer (doc, &output);
400 FIX_BUFFER(&output);
401 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
402 tidyBufFree(&output);
403 } else {
404 RETVAL_FALSE;
405 }
406 }
407 }
408
409 if (is_file) {
410 zend_string_release_ex(data, 0);
411 }
412
413 tidyBufFree(errbuf);
414 efree(errbuf);
415 tidyRelease(doc);
416 }
417
php_tidy_file_to_mem(char * filename,bool use_include_path)418 static zend_string *php_tidy_file_to_mem(char *filename, bool use_include_path)
419 {
420 php_stream *stream;
421 zend_string *data = NULL;
422
423 if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
424 return NULL;
425 }
426 if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
427 data = ZSTR_EMPTY_ALLOC();
428 }
429 php_stream_close(stream);
430
431 return data;
432 }
433
tidy_object_free_storage(zend_object * object)434 static void tidy_object_free_storage(zend_object *object)
435 {
436 PHPTidyObj *intern = php_tidy_fetch_object(object);
437
438 zend_object_std_dtor(&intern->std);
439
440 if (intern->ptdoc) {
441 intern->ptdoc->ref_count--;
442
443 if (intern->ptdoc->ref_count <= 0) {
444 tidyBufFree(intern->ptdoc->errbuf);
445 efree(intern->ptdoc->errbuf);
446 tidyRelease(intern->ptdoc->doc);
447 efree(intern->ptdoc);
448 }
449 }
450 }
451
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)452 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
453 {
454 PHPTidyObj *intern;
455
456 intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
457 zend_object_std_init(&intern->std, class_type);
458 object_properties_init(&intern->std, class_type);
459
460 switch(objtype) {
461 case is_node:
462 break;
463
464 case is_doc:
465 intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
466 intern->ptdoc->doc = tidyCreate();
467 intern->ptdoc->ref_count = 1;
468 intern->ptdoc->initialized = 0;
469 intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
470 tidyBufInit(intern->ptdoc->errbuf);
471
472 if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
473 tidyBufFree(intern->ptdoc->errbuf);
474 efree(intern->ptdoc->errbuf);
475 tidyRelease(intern->ptdoc->doc);
476 efree(intern->ptdoc);
477 efree(intern);
478 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
479 }
480
481 tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
482 tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
483
484 TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
485
486 tidy_add_default_properties(intern, is_doc);
487 break;
488 }
489
490 intern->std.handlers = handlers;
491
492 return &intern->std;
493 }
494
tidy_object_new_node(zend_class_entry * class_type)495 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
496 {
497 return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
498 }
499
tidy_object_new_doc(zend_class_entry * class_type)500 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
501 {
502 return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
503 }
504
tidy_instanciate(zend_class_entry * pce,zval * object)505 static zval * tidy_instanciate(zend_class_entry *pce, zval *object)
506 {
507 object_init_ex(object, pce);
508 return object;
509 }
510
tidy_doc_cast_handler(zend_object * in,zval * out,int type)511 static int tidy_doc_cast_handler(zend_object *in, zval *out, int type)
512 {
513 TidyBuffer output;
514 PHPTidyObj *obj;
515
516 switch (type) {
517 case IS_LONG:
518 case _IS_NUMBER:
519 ZVAL_LONG(out, 0);
520 break;
521
522 case IS_DOUBLE:
523 ZVAL_DOUBLE(out, 0);
524 break;
525
526 case _IS_BOOL:
527 ZVAL_TRUE(out);
528 break;
529
530 case IS_STRING:
531 obj = php_tidy_fetch_object(in);
532 tidyBufInit(&output);
533 tidySaveBuffer (obj->ptdoc->doc, &output);
534 if (output.size) {
535 ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
536 } else {
537 ZVAL_EMPTY_STRING(out);
538 }
539 tidyBufFree(&output);
540 break;
541
542 default:
543 return FAILURE;
544 }
545
546 return SUCCESS;
547 }
548
tidy_node_cast_handler(zend_object * in,zval * out,int type)549 static int tidy_node_cast_handler(zend_object *in, zval *out, int type)
550 {
551 TidyBuffer buf;
552 PHPTidyObj *obj;
553
554 switch(type) {
555 case IS_LONG:
556 case _IS_NUMBER:
557 ZVAL_LONG(out, 0);
558 break;
559
560 case IS_DOUBLE:
561 ZVAL_DOUBLE(out, 0);
562 break;
563
564 case _IS_BOOL:
565 ZVAL_TRUE(out);
566 break;
567
568 case IS_STRING:
569 obj = php_tidy_fetch_object(in);
570 tidyBufInit(&buf);
571 if (obj->ptdoc) {
572 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
573 ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
574 } else {
575 ZVAL_EMPTY_STRING(out);
576 }
577 tidyBufFree(&buf);
578 break;
579
580 default:
581 return FAILURE;
582 }
583
584 return SUCCESS;
585 }
586
tidy_doc_update_properties(PHPTidyObj * obj)587 static void tidy_doc_update_properties(PHPTidyObj *obj)
588 {
589
590 TidyBuffer output;
591 zval temp;
592
593 tidyBufInit(&output);
594 tidySaveBuffer (obj->ptdoc->doc, &output);
595
596 if (output.size) {
597 if (!obj->std.properties) {
598 rebuild_object_properties(&obj->std);
599 }
600 ZVAL_STRINGL(&temp, (char*)output.bp, output.size-1);
601 zend_hash_str_update(obj->std.properties, "value", sizeof("value") - 1, &temp);
602 }
603
604 tidyBufFree(&output);
605
606 if (obj->ptdoc->errbuf->size) {
607 if (!obj->std.properties) {
608 rebuild_object_properties(&obj->std);
609 }
610 ZVAL_STRINGL(&temp, (char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
611 zend_hash_str_update(obj->std.properties, "errorBuffer", sizeof("errorBuffer") - 1, &temp);
612 }
613 }
614
tidy_add_default_properties(PHPTidyObj * obj,tidy_obj_type type)615 static void tidy_add_default_properties(PHPTidyObj *obj, tidy_obj_type type)
616 {
617
618 TidyBuffer buf;
619 TidyAttr tempattr;
620 TidyNode tempnode;
621 zval attribute, children, temp;
622 PHPTidyObj *newobj;
623
624 switch(type) {
625
626 case is_node:
627 if (!obj->std.properties) {
628 rebuild_object_properties(&obj->std);
629 }
630 tidyBufInit(&buf);
631 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
632 ADD_PROPERTY_STRINGL(obj->std.properties, value, buf.bp, buf.size ? buf.size-1 : 0);
633 tidyBufFree(&buf);
634
635 ADD_PROPERTY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node));
636 ADD_PROPERTY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node));
637 ADD_PROPERTY_LONG(obj->std.properties, line, tidyNodeLine(obj->node));
638 ADD_PROPERTY_LONG(obj->std.properties, column, tidyNodeColumn(obj->node));
639 ADD_PROPERTY_BOOL(obj->std.properties, proprietary, tidyNodeIsProp(obj->ptdoc->doc, obj->node));
640
641 switch(tidyNodeGetType(obj->node)) {
642 case TidyNode_Root:
643 case TidyNode_DocType:
644 case TidyNode_Text:
645 case TidyNode_Comment:
646 break;
647
648 default:
649 ADD_PROPERTY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node));
650 }
651
652 tempattr = tidyAttrFirst(obj->node);
653
654 if (tempattr) {
655 char *name, *val;
656 array_init(&attribute);
657
658 do {
659 name = (char *)tidyAttrName(tempattr);
660 val = (char *)tidyAttrValue(tempattr);
661 if (name && val) {
662 add_assoc_string(&attribute, name, val);
663 }
664 } while((tempattr = tidyAttrNext(tempattr)));
665 } else {
666 ZVAL_NULL(&attribute);
667 }
668 zend_hash_str_update(obj->std.properties, "attribute", sizeof("attribute") - 1, &attribute);
669
670 tempnode = tidyGetChild(obj->node);
671
672 if (tempnode) {
673 array_init(&children);
674 do {
675 tidy_instanciate(tidy_ce_node, &temp);
676 newobj = Z_TIDY_P(&temp);
677 newobj->node = tempnode;
678 newobj->type = is_node;
679 newobj->ptdoc = obj->ptdoc;
680 newobj->ptdoc->ref_count++;
681
682 tidy_add_default_properties(newobj, is_node);
683 add_next_index_zval(&children, &temp);
684
685 } while((tempnode = tidyGetNext(tempnode)));
686
687 } else {
688 ZVAL_NULL(&children);
689 }
690
691 zend_hash_str_update(obj->std.properties, "child", sizeof("child") - 1, &children);
692
693 break;
694
695 case is_doc:
696 if (!obj->std.properties) {
697 rebuild_object_properties(&obj->std);
698 }
699 ADD_PROPERTY_NULL(obj->std.properties, errorBuffer);
700 ADD_PROPERTY_NULL(obj->std.properties, value);
701 break;
702 }
703 }
704
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)705 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
706 {
707 *type = tidyOptGetType(opt);
708
709 switch (*type) {
710 case TidyString: {
711 char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
712 if (val) {
713 return (void *) zend_string_init(val, strlen(val), 0);
714 } else {
715 return (void *) ZSTR_EMPTY_ALLOC();
716 }
717 }
718 break;
719
720 case TidyInteger:
721 return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
722 break;
723
724 case TidyBoolean:
725 return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
726 break;
727 }
728
729 /* should not happen */
730 return NULL;
731 }
732
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)733 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
734 {
735 PHPTidyObj *newobj;
736 TidyNode node;
737 TIDY_FETCH_OBJECT;
738
739 switch (node_type) {
740 case is_root_node:
741 node = tidyGetRoot(obj->ptdoc->doc);
742 break;
743
744 case is_html_node:
745 node = tidyGetHtml(obj->ptdoc->doc);
746 break;
747
748 case is_head_node:
749 node = tidyGetHead(obj->ptdoc->doc);
750 break;
751
752 case is_body_node:
753 node = tidyGetBody(obj->ptdoc->doc);
754 break;
755
756 EMPTY_SWITCH_DEFAULT_CASE()
757 }
758
759 if (!node) {
760 RETURN_NULL();
761 }
762
763 tidy_instanciate(tidy_ce_node, return_value);
764 newobj = Z_TIDY_P(return_value);
765 newobj->type = is_node;
766 newobj->ptdoc = obj->ptdoc;
767 newobj->node = node;
768 newobj->ptdoc->ref_count++;
769
770 tidy_add_default_properties(newobj, is_node);
771 }
772
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)773 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
774 {
775 zval *opt_val;
776 zend_string *opt_name;
777
778 ZEND_HASH_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
779 if (opt_name == NULL) {
780 continue;
781 }
782 _php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
783 } ZEND_HASH_FOREACH_END();
784
785 return SUCCESS;
786 }
787
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)788 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
789 {
790 TidyBuffer buf;
791
792 if(enc) {
793 if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
794 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
795 return FAILURE;
796 }
797 }
798
799 obj->ptdoc->initialized = 1;
800
801 tidyBufInit(&buf);
802 tidyBufAttach(&buf, (byte *) string, len);
803 if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
804 php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
805 return FAILURE;
806 }
807 tidy_doc_update_properties(obj);
808
809 return SUCCESS;
810 }
811
PHP_MINIT_FUNCTION(tidy)812 static PHP_MINIT_FUNCTION(tidy)
813 {
814 tidySetMallocCall(php_tidy_malloc);
815 tidySetReallocCall(php_tidy_realloc);
816 tidySetFreeCall(php_tidy_free);
817 tidySetPanicCall(php_tidy_panic);
818
819 REGISTER_INI_ENTRIES();
820
821 tidy_ce_doc = register_class_tidy();
822 tidy_ce_doc->create_object = tidy_object_new_doc;
823 memcpy(&tidy_object_handlers_doc, &std_object_handlers, sizeof(zend_object_handlers));
824 tidy_object_handlers_doc.clone_obj = NULL;
825
826 tidy_ce_node = register_class_tidyNode();
827 tidy_ce_node->create_object = tidy_object_new_node;
828 memcpy(&tidy_object_handlers_node, &std_object_handlers, sizeof(zend_object_handlers));
829 tidy_object_handlers_node.clone_obj = NULL;
830
831 tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
832 tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
833
834 tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
835 tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
836
837 _php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU);
838 _php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU);
839
840 php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
841
842 return SUCCESS;
843 }
844
PHP_RINIT_FUNCTION(tidy)845 static PHP_RINIT_FUNCTION(tidy)
846 {
847 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
848 ZEND_TSRMLS_CACHE_UPDATE();
849 #endif
850
851 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
852
853 return SUCCESS;
854 }
855
PHP_RSHUTDOWN_FUNCTION(tidy)856 static PHP_RSHUTDOWN_FUNCTION(tidy)
857 {
858 TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
859
860 return SUCCESS;
861 }
862
PHP_MSHUTDOWN_FUNCTION(tidy)863 static PHP_MSHUTDOWN_FUNCTION(tidy)
864 {
865 UNREGISTER_INI_ENTRIES();
866 return SUCCESS;
867 }
868
PHP_MINFO_FUNCTION(tidy)869 static PHP_MINFO_FUNCTION(tidy)
870 {
871 php_info_print_table_start();
872 php_info_print_table_row(2, "Tidy support", "enabled");
873 #ifdef HAVE_TIDYBUFFIO_H
874 php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
875 #elif defined(HAVE_TIDYP_H)
876 php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
877 #endif
878 #ifdef HAVE_TIDYRELEASEDATE
879 php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
880 #endif
881 php_info_print_table_end();
882
883 DISPLAY_INI_ENTRIES();
884 }
885
PHP_INI_MH(php_tidy_set_clean_output)886 static PHP_INI_MH(php_tidy_set_clean_output)
887 {
888 int status;
889 bool value;
890
891 value = zend_ini_parse_bool(new_value);
892
893 if (stage == PHP_INI_STAGE_RUNTIME) {
894 status = php_output_get_status();
895
896 if (value && (status & PHP_OUTPUT_WRITTEN)) {
897 php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
898 return FAILURE;
899 }
900 if (status & PHP_OUTPUT_SENT) {
901 php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
902 return FAILURE;
903 }
904 }
905
906 status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
907
908 if (stage == PHP_INI_STAGE_RUNTIME && value) {
909 if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
910 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
911 }
912 }
913
914 return status;
915 }
916
917 /*
918 * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
919 */
920
php_tidy_clean_output_start(const char * name,size_t name_len)921 static void php_tidy_clean_output_start(const char *name, size_t name_len)
922 {
923 php_output_handler *h;
924
925 if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
926 php_output_handler_start(h);
927 }
928 }
929
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)930 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
931 {
932 if (chunk_size) {
933 php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
934 return NULL;
935 }
936 if (!TG(clean_output)) {
937 TG(clean_output) = 1;
938 }
939 return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
940 }
941
php_tidy_output_handler(void ** nothing,php_output_context * output_context)942 static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
943 {
944 int status = FAILURE;
945 TidyDoc doc;
946 TidyBuffer inbuf, outbuf, errbuf;
947
948 if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
949 doc = tidyCreate();
950 tidyBufInit(&errbuf);
951
952 if (0 == tidySetErrorBuffer(doc, &errbuf)) {
953 tidyOptSetBool(doc, TidyForceOutput, yes);
954 tidyOptSetBool(doc, TidyMark, no);
955
956 if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
957 php_error_docref(NULL, E_WARNING, "Input string is too long");
958 return status;
959 }
960
961 TIDY_SET_DEFAULT_CONFIG(doc);
962
963 tidyBufInit(&inbuf);
964 tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
965
966 if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
967 tidyBufInit(&outbuf);
968 tidySaveBuffer(doc, &outbuf);
969 FIX_BUFFER(&outbuf);
970 output_context->out.data = (char *) outbuf.bp;
971 output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
972 output_context->out.free = 1;
973 status = SUCCESS;
974 }
975 }
976
977 tidyRelease(doc);
978 tidyBufFree(&errbuf);
979 }
980
981 return status;
982 }
983
984 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)985 PHP_FUNCTION(tidy_parse_string)
986 {
987 char *enc = NULL;
988 size_t enc_len = 0;
989 zend_string *input, *options_str = NULL;
990 HashTable *options_ht = NULL;
991 PHPTidyObj *obj;
992
993 ZEND_PARSE_PARAMETERS_START(1, 3)
994 Z_PARAM_STR(input)
995 Z_PARAM_OPTIONAL
996 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
997 Z_PARAM_STRING_OR_NULL(enc, enc_len)
998 ZEND_PARSE_PARAMETERS_END();
999
1000 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1001 zend_argument_value_error(1, "is too long");
1002 RETURN_THROWS();
1003 }
1004
1005 tidy_instanciate(tidy_ce_doc, return_value);
1006 obj = Z_TIDY_P(return_value);
1007
1008 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1009
1010 if (php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1011 zval_ptr_dtor(return_value);
1012 RETURN_FALSE;
1013 }
1014 }
1015 /* }}} */
1016
1017 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1018 PHP_FUNCTION(tidy_get_error_buffer)
1019 {
1020 TIDY_FETCH_OBJECT;
1021
1022 if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1023 RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1024 } else {
1025 RETURN_FALSE;
1026 }
1027 }
1028 /* }}} */
1029
1030 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1031 PHP_FUNCTION(tidy_get_output)
1032 {
1033 TidyBuffer output;
1034 TIDY_FETCH_OBJECT;
1035
1036 tidyBufInit(&output);
1037 tidySaveBuffer(obj->ptdoc->doc, &output);
1038 FIX_BUFFER(&output);
1039 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1040 tidyBufFree(&output);
1041 }
1042 /* }}} */
1043
1044 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1045 PHP_FUNCTION(tidy_parse_file)
1046 {
1047 char *enc = NULL;
1048 size_t enc_len = 0;
1049 bool use_include_path = 0;
1050 zend_string *inputfile, *contents, *options_str = NULL;
1051 HashTable *options_ht = NULL;
1052
1053 PHPTidyObj *obj;
1054
1055 ZEND_PARSE_PARAMETERS_START(1, 4)
1056 Z_PARAM_PATH_STR(inputfile)
1057 Z_PARAM_OPTIONAL
1058 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1059 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1060 Z_PARAM_BOOL(use_include_path)
1061 ZEND_PARSE_PARAMETERS_END();
1062
1063 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1064 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1065 RETURN_FALSE;
1066 }
1067
1068 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1069 zend_string_release_ex(contents, 0);
1070 zend_value_error("Input string is too long");
1071 RETURN_THROWS();
1072 }
1073
1074 tidy_instanciate(tidy_ce_doc, return_value);
1075 obj = Z_TIDY_P(return_value);
1076
1077 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1078
1079 if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1080 zval_ptr_dtor(return_value);
1081 RETVAL_FALSE;
1082 }
1083
1084 zend_string_release_ex(contents, 0);
1085 }
1086 /* }}} */
1087
1088 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1089 PHP_FUNCTION(tidy_clean_repair)
1090 {
1091 TIDY_FETCH_OBJECT;
1092
1093 if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1094 tidy_doc_update_properties(obj);
1095 RETURN_TRUE;
1096 }
1097
1098 RETURN_FALSE;
1099 }
1100 /* }}} */
1101
1102 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1103 PHP_FUNCTION(tidy_repair_string)
1104 {
1105 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1106 }
1107 /* }}} */
1108
1109 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1110 PHP_FUNCTION(tidy_repair_file)
1111 {
1112 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1113 }
1114 /* }}} */
1115
1116 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1117 PHP_FUNCTION(tidy_diagnose)
1118 {
1119 TIDY_FETCH_OBJECT;
1120
1121 if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1122 tidy_doc_update_properties(obj);
1123 RETURN_TRUE;
1124 }
1125
1126 RETURN_FALSE;
1127 }
1128 /* }}} */
1129
1130 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1131 PHP_FUNCTION(tidy_get_release)
1132 {
1133 if (zend_parse_parameters_none() == FAILURE) {
1134 RETURN_THROWS();
1135 }
1136
1137 #ifdef HAVE_TIDYRELEASEDATE
1138 RETURN_STRING((char *)tidyReleaseDate());
1139 #else
1140 RETURN_STRING((char *)"unknown");
1141 #endif
1142 }
1143 /* }}} */
1144
1145
1146 #ifdef HAVE_TIDYOPTGETDOC
1147 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1148 PHP_FUNCTION(tidy_get_opt_doc)
1149 {
1150 PHPTidyObj *obj;
1151 char *optval, *optname;
1152 size_t optname_len;
1153 TidyOption opt;
1154 zval *object;
1155
1156 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1157 RETURN_THROWS();
1158 }
1159
1160 obj = Z_TIDY_P(object);
1161
1162 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1163
1164 if (!opt) {
1165 zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1166 RETURN_THROWS();
1167 }
1168
1169 if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1170 RETURN_STRING(optval);
1171 }
1172
1173 RETURN_FALSE;
1174 }
1175 /* }}} */
1176 #endif
1177
1178
1179 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1180 PHP_FUNCTION(tidy_get_config)
1181 {
1182 TidyIterator itOpt;
1183 char *opt_name;
1184 void *opt_value;
1185 TidyOptionType optt;
1186
1187 TIDY_FETCH_OBJECT;
1188
1189 itOpt = tidyGetOptionList(obj->ptdoc->doc);
1190
1191 array_init(return_value);
1192
1193 while (itOpt) {
1194 TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1195
1196 opt_name = (char *)tidyOptGetName(opt);
1197 opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1198 switch (optt) {
1199 case TidyString:
1200 add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1201 break;
1202
1203 case TidyInteger:
1204 add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1205 break;
1206
1207 case TidyBoolean:
1208 add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1209 break;
1210 }
1211 }
1212
1213 return;
1214 }
1215 /* }}} */
1216
1217 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1218 PHP_FUNCTION(tidy_get_status)
1219 {
1220 TIDY_FETCH_OBJECT;
1221
1222 RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1223 }
1224 /* }}} */
1225
1226 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1227 PHP_FUNCTION(tidy_get_html_ver)
1228 {
1229 TIDY_FETCH_INITIALIZED_OBJECT;
1230
1231 RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1232 }
1233 /* }}} */
1234
1235 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1236 PHP_FUNCTION(tidy_is_xhtml)
1237 {
1238 TIDY_FETCH_INITIALIZED_OBJECT;
1239
1240 RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1241 }
1242 /* }}} */
1243
1244 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1245 PHP_FUNCTION(tidy_is_xml)
1246 {
1247 TIDY_FETCH_INITIALIZED_OBJECT;
1248
1249 RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1250 }
1251 /* }}} */
1252
1253 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1254 PHP_FUNCTION(tidy_error_count)
1255 {
1256 TIDY_FETCH_OBJECT;
1257
1258 RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1259 }
1260 /* }}} */
1261
1262 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1263 PHP_FUNCTION(tidy_warning_count)
1264 {
1265 TIDY_FETCH_OBJECT;
1266
1267 RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1268 }
1269 /* }}} */
1270
1271 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1272 PHP_FUNCTION(tidy_access_count)
1273 {
1274 TIDY_FETCH_OBJECT;
1275
1276 RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1277 }
1278 /* }}} */
1279
1280 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1281 PHP_FUNCTION(tidy_config_count)
1282 {
1283 TIDY_FETCH_OBJECT;
1284
1285 RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1286 }
1287 /* }}} */
1288
1289 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1290 PHP_FUNCTION(tidy_getopt)
1291 {
1292 PHPTidyObj *obj;
1293 char *optname;
1294 void *optval;
1295 size_t optname_len;
1296 TidyOption opt;
1297 TidyOptionType optt;
1298 zval *object;
1299
1300 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1301 RETURN_THROWS();
1302 }
1303
1304 obj = Z_TIDY_P(object);
1305
1306 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1307
1308 if (!opt) {
1309 zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1310 RETURN_THROWS();
1311 }
1312
1313 optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1314 switch (optt) {
1315 case TidyString:
1316 RETVAL_STR((zend_string*)optval);
1317 return;
1318
1319 case TidyInteger:
1320 RETURN_LONG((zend_long)optval);
1321 break;
1322
1323 case TidyBoolean:
1324 if (optval) {
1325 RETURN_TRUE;
1326 } else {
1327 RETURN_FALSE;
1328 }
1329 break;
1330
1331 default:
1332 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1333 break;
1334 }
1335
1336 RETURN_FALSE;
1337 }
1338 /* }}} */
1339
PHP_METHOD(tidy,__construct)1340 PHP_METHOD(tidy, __construct)
1341 {
1342 char *enc = NULL;
1343 size_t enc_len = 0;
1344 bool use_include_path = 0;
1345 HashTable *options_ht = NULL;
1346 zend_string *contents, *inputfile = NULL, *options_str = NULL;
1347 PHPTidyObj *obj;
1348
1349 ZEND_PARSE_PARAMETERS_START(0, 4)
1350 Z_PARAM_OPTIONAL
1351 Z_PARAM_PATH_STR_OR_NULL(inputfile)
1352 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1353 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1354 Z_PARAM_BOOL(use_include_path)
1355 ZEND_PARSE_PARAMETERS_END();
1356
1357 TIDY_SET_CONTEXT;
1358 obj = Z_TIDY_P(object);
1359
1360 if (inputfile) {
1361 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1362 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1363 return;
1364 }
1365
1366 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1367 zend_string_release_ex(contents, 0);
1368 zend_value_error("Input string is too long");
1369 RETURN_THROWS();
1370 }
1371
1372 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1373
1374 php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1375
1376 zend_string_release_ex(contents, 0);
1377 }
1378 }
1379
PHP_METHOD(tidy,parseFile)1380 PHP_METHOD(tidy, parseFile)
1381 {
1382 char *enc = NULL;
1383 size_t enc_len = 0;
1384 bool use_include_path = 0;
1385 HashTable *options_ht = NULL;
1386 zend_string *inputfile, *contents, *options_str = NULL;
1387 PHPTidyObj *obj;
1388
1389 ZEND_PARSE_PARAMETERS_START(1, 4)
1390 Z_PARAM_PATH_STR(inputfile)
1391 Z_PARAM_OPTIONAL
1392 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1393 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1394 Z_PARAM_BOOL(use_include_path)
1395 ZEND_PARSE_PARAMETERS_END();
1396
1397 TIDY_SET_CONTEXT;
1398 obj = Z_TIDY_P(object);
1399
1400 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1401 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1402 RETURN_FALSE;
1403 }
1404
1405 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1406 zend_string_release_ex(contents, 0);
1407 zend_value_error("Input string is too long");
1408 RETURN_THROWS();
1409 }
1410
1411 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1412
1413 if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1414 RETVAL_FALSE;
1415 } else {
1416 RETVAL_TRUE;
1417 }
1418
1419 zend_string_release_ex(contents, 0);
1420 }
1421
PHP_METHOD(tidy,parseString)1422 PHP_METHOD(tidy, parseString)
1423 {
1424 char *enc = NULL;
1425 size_t enc_len = 0;
1426 HashTable *options_ht = NULL;
1427 PHPTidyObj *obj;
1428 zend_string *input, *options_str = NULL;
1429
1430 ZEND_PARSE_PARAMETERS_START(1, 3)
1431 Z_PARAM_STR(input)
1432 Z_PARAM_OPTIONAL
1433 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1434 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1435 ZEND_PARSE_PARAMETERS_END();
1436
1437 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1438 zend_argument_value_error(1, "is too long");
1439 RETURN_THROWS();
1440 }
1441
1442 TIDY_SET_CONTEXT;
1443 obj = Z_TIDY_P(object);
1444
1445 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1446
1447 if(php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1448 RETURN_TRUE;
1449 }
1450
1451 RETURN_FALSE;
1452 }
1453
1454
1455 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1456 PHP_FUNCTION(tidy_get_root)
1457 {
1458 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1459 }
1460 /* }}} */
1461
1462 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1463 PHP_FUNCTION(tidy_get_html)
1464 {
1465 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1466 }
1467 /* }}} */
1468
1469 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1470 PHP_FUNCTION(tidy_get_head)
1471 {
1472 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1473 }
1474 /* }}} */
1475
1476 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1477 PHP_FUNCTION(tidy_get_body)
1478 {
1479 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1480 }
1481 /* }}} */
1482
1483 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1484 PHP_METHOD(tidyNode, hasChildren)
1485 {
1486 TIDY_FETCH_ONLY_OBJECT;
1487
1488 if (tidyGetChild(obj->node)) {
1489 RETURN_TRUE;
1490 } else {
1491 RETURN_FALSE;
1492 }
1493 }
1494 /* }}} */
1495
1496 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1497 PHP_METHOD(tidyNode, hasSiblings)
1498 {
1499 TIDY_FETCH_ONLY_OBJECT;
1500
1501 if (obj->node && tidyGetNext(obj->node)) {
1502 RETURN_TRUE;
1503 } else {
1504 RETURN_FALSE;
1505 }
1506 }
1507 /* }}} */
1508
1509 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1510 PHP_METHOD(tidyNode, isComment)
1511 {
1512 TIDY_FETCH_ONLY_OBJECT;
1513
1514 if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1515 RETURN_TRUE;
1516 } else {
1517 RETURN_FALSE;
1518 }
1519 }
1520 /* }}} */
1521
1522 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1523 PHP_METHOD(tidyNode, isHtml)
1524 {
1525 TIDY_FETCH_ONLY_OBJECT;
1526
1527 switch (tidyNodeGetType(obj->node)) {
1528 case TidyNode_Start:
1529 case TidyNode_End:
1530 case TidyNode_StartEnd:
1531 RETURN_TRUE;
1532 default:
1533 RETURN_FALSE;
1534 }
1535 }
1536 /* }}} */
1537
1538 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1539 PHP_METHOD(tidyNode, isText)
1540 {
1541 TIDY_FETCH_ONLY_OBJECT;
1542
1543 if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1544 RETURN_TRUE;
1545 } else {
1546 RETURN_FALSE;
1547 }
1548 }
1549 /* }}} */
1550
1551 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1552 PHP_METHOD(tidyNode, isJste)
1553 {
1554 TIDY_FETCH_ONLY_OBJECT;
1555
1556 if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1557 RETURN_TRUE;
1558 } else {
1559 RETURN_FALSE;
1560 }
1561 }
1562 /* }}} */
1563
1564 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1565 PHP_METHOD(tidyNode, isAsp)
1566 {
1567 TIDY_FETCH_ONLY_OBJECT;
1568
1569 if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1570 RETURN_TRUE;
1571 } else {
1572 RETURN_FALSE;
1573 }
1574 }
1575 /* }}} */
1576
1577 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1578 PHP_METHOD(tidyNode, isPhp)
1579 {
1580 TIDY_FETCH_ONLY_OBJECT;
1581
1582 if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1583 RETURN_TRUE;
1584 } else {
1585 RETURN_FALSE;
1586 }
1587 }
1588 /* }}} */
1589
1590 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1591 PHP_METHOD(tidyNode, getParent)
1592 {
1593 TidyNode parent_node;
1594 PHPTidyObj *newobj;
1595 TIDY_FETCH_ONLY_OBJECT;
1596
1597 parent_node = tidyGetParent(obj->node);
1598 if(parent_node) {
1599 tidy_instanciate(tidy_ce_node, return_value);
1600 newobj = Z_TIDY_P(return_value);
1601 newobj->node = parent_node;
1602 newobj->type = is_node;
1603 newobj->ptdoc = obj->ptdoc;
1604 newobj->ptdoc->ref_count++;
1605 tidy_add_default_properties(newobj, is_node);
1606 } else {
1607 ZVAL_NULL(return_value);
1608 }
1609 }
1610 /* }}} */
1611
1612
1613 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1614 PHP_METHOD(tidyNode, __construct)
1615 {
1616 zend_throw_error(NULL, "You should not create a tidyNode manually");
1617 }
1618 /* }}} */
1619
_php_tidy_register_nodetypes(INIT_FUNC_ARGS)1620 static void _php_tidy_register_nodetypes(INIT_FUNC_ARGS)
1621 {
1622 TIDY_NODE_CONST(ROOT, Root);
1623 TIDY_NODE_CONST(DOCTYPE, DocType);
1624 TIDY_NODE_CONST(COMMENT, Comment);
1625 TIDY_NODE_CONST(PROCINS, ProcIns);
1626 TIDY_NODE_CONST(TEXT, Text);
1627 TIDY_NODE_CONST(START, Start);
1628 TIDY_NODE_CONST(END, End);
1629 TIDY_NODE_CONST(STARTEND, StartEnd);
1630 TIDY_NODE_CONST(CDATA, CDATA);
1631 TIDY_NODE_CONST(SECTION, Section);
1632 TIDY_NODE_CONST(ASP, Asp);
1633 TIDY_NODE_CONST(JSTE, Jste);
1634 TIDY_NODE_CONST(PHP, Php);
1635 TIDY_NODE_CONST(XMLDECL, XmlDecl);
1636 }
1637
_php_tidy_register_tags(INIT_FUNC_ARGS)1638 static void _php_tidy_register_tags(INIT_FUNC_ARGS)
1639 {
1640 TIDY_TAG_CONST(UNKNOWN);
1641 TIDY_TAG_CONST(A);
1642 TIDY_TAG_CONST(ABBR);
1643 TIDY_TAG_CONST(ACRONYM);
1644 TIDY_TAG_CONST(ADDRESS);
1645 TIDY_TAG_CONST(ALIGN);
1646 TIDY_TAG_CONST(APPLET);
1647 TIDY_TAG_CONST(AREA);
1648 TIDY_TAG_CONST(B);
1649 TIDY_TAG_CONST(BASE);
1650 TIDY_TAG_CONST(BASEFONT);
1651 TIDY_TAG_CONST(BDO);
1652 TIDY_TAG_CONST(BGSOUND);
1653 TIDY_TAG_CONST(BIG);
1654 TIDY_TAG_CONST(BLINK);
1655 TIDY_TAG_CONST(BLOCKQUOTE);
1656 TIDY_TAG_CONST(BODY);
1657 TIDY_TAG_CONST(BR);
1658 TIDY_TAG_CONST(BUTTON);
1659 TIDY_TAG_CONST(CAPTION);
1660 TIDY_TAG_CONST(CENTER);
1661 TIDY_TAG_CONST(CITE);
1662 TIDY_TAG_CONST(CODE);
1663 TIDY_TAG_CONST(COL);
1664 TIDY_TAG_CONST(COLGROUP);
1665 TIDY_TAG_CONST(COMMENT);
1666 TIDY_TAG_CONST(DD);
1667 TIDY_TAG_CONST(DEL);
1668 TIDY_TAG_CONST(DFN);
1669 TIDY_TAG_CONST(DIR);
1670 TIDY_TAG_CONST(DIV);
1671 TIDY_TAG_CONST(DL);
1672 TIDY_TAG_CONST(DT);
1673 TIDY_TAG_CONST(EM);
1674 TIDY_TAG_CONST(EMBED);
1675 TIDY_TAG_CONST(FIELDSET);
1676 TIDY_TAG_CONST(FONT);
1677 TIDY_TAG_CONST(FORM);
1678 TIDY_TAG_CONST(FRAME);
1679 TIDY_TAG_CONST(FRAMESET);
1680 TIDY_TAG_CONST(H1);
1681 TIDY_TAG_CONST(H2);
1682 TIDY_TAG_CONST(H3);
1683 TIDY_TAG_CONST(H4);
1684 TIDY_TAG_CONST(H5);
1685 TIDY_TAG_CONST(H6);
1686 TIDY_TAG_CONST(HEAD);
1687 TIDY_TAG_CONST(HR);
1688 TIDY_TAG_CONST(HTML);
1689 TIDY_TAG_CONST(I);
1690 TIDY_TAG_CONST(IFRAME);
1691 TIDY_TAG_CONST(ILAYER);
1692 TIDY_TAG_CONST(IMG);
1693 TIDY_TAG_CONST(INPUT);
1694 TIDY_TAG_CONST(INS);
1695 TIDY_TAG_CONST(ISINDEX);
1696 TIDY_TAG_CONST(KBD);
1697 TIDY_TAG_CONST(KEYGEN);
1698 TIDY_TAG_CONST(LABEL);
1699 TIDY_TAG_CONST(LAYER);
1700 TIDY_TAG_CONST(LEGEND);
1701 TIDY_TAG_CONST(LI);
1702 TIDY_TAG_CONST(LINK);
1703 TIDY_TAG_CONST(LISTING);
1704 TIDY_TAG_CONST(MAP);
1705 TIDY_TAG_CONST(MARQUEE);
1706 TIDY_TAG_CONST(MENU);
1707 TIDY_TAG_CONST(META);
1708 TIDY_TAG_CONST(MULTICOL);
1709 TIDY_TAG_CONST(NOBR);
1710 TIDY_TAG_CONST(NOEMBED);
1711 TIDY_TAG_CONST(NOFRAMES);
1712 TIDY_TAG_CONST(NOLAYER);
1713 TIDY_TAG_CONST(NOSAVE);
1714 TIDY_TAG_CONST(NOSCRIPT);
1715 TIDY_TAG_CONST(OBJECT);
1716 TIDY_TAG_CONST(OL);
1717 TIDY_TAG_CONST(OPTGROUP);
1718 TIDY_TAG_CONST(OPTION);
1719 TIDY_TAG_CONST(P);
1720 TIDY_TAG_CONST(PARAM);
1721 TIDY_TAG_CONST(PLAINTEXT);
1722 TIDY_TAG_CONST(PRE);
1723 TIDY_TAG_CONST(Q);
1724 TIDY_TAG_CONST(RB);
1725 TIDY_TAG_CONST(RBC);
1726 TIDY_TAG_CONST(RP);
1727 TIDY_TAG_CONST(RT);
1728 TIDY_TAG_CONST(RTC);
1729 TIDY_TAG_CONST(RUBY);
1730 TIDY_TAG_CONST(S);
1731 TIDY_TAG_CONST(SAMP);
1732 TIDY_TAG_CONST(SCRIPT);
1733 TIDY_TAG_CONST(SELECT);
1734 TIDY_TAG_CONST(SERVER);
1735 TIDY_TAG_CONST(SERVLET);
1736 TIDY_TAG_CONST(SMALL);
1737 TIDY_TAG_CONST(SPACER);
1738 TIDY_TAG_CONST(SPAN);
1739 TIDY_TAG_CONST(STRIKE);
1740 TIDY_TAG_CONST(STRONG);
1741 TIDY_TAG_CONST(STYLE);
1742 TIDY_TAG_CONST(SUB);
1743 TIDY_TAG_CONST(SUP);
1744 TIDY_TAG_CONST(TABLE);
1745 TIDY_TAG_CONST(TBODY);
1746 TIDY_TAG_CONST(TD);
1747 TIDY_TAG_CONST(TEXTAREA);
1748 TIDY_TAG_CONST(TFOOT);
1749 TIDY_TAG_CONST(TH);
1750 TIDY_TAG_CONST(THEAD);
1751 TIDY_TAG_CONST(TITLE);
1752 TIDY_TAG_CONST(TR);
1753 TIDY_TAG_CONST(TT);
1754 TIDY_TAG_CONST(U);
1755 TIDY_TAG_CONST(UL);
1756 TIDY_TAG_CONST(VAR);
1757 TIDY_TAG_CONST(WBR);
1758 TIDY_TAG_CONST(XMP);
1759 # ifdef HAVE_TIDYBUFFIO_H
1760 TIDY_TAG_CONST(ARTICLE);
1761 TIDY_TAG_CONST(ASIDE);
1762 TIDY_TAG_CONST(AUDIO);
1763 TIDY_TAG_CONST(BDI);
1764 TIDY_TAG_CONST(CANVAS);
1765 TIDY_TAG_CONST(COMMAND);
1766 TIDY_TAG_CONST(DATALIST);
1767 TIDY_TAG_CONST(DETAILS);
1768 TIDY_TAG_CONST(DIALOG);
1769 TIDY_TAG_CONST(FIGCAPTION);
1770 TIDY_TAG_CONST(FIGURE);
1771 TIDY_TAG_CONST(FOOTER);
1772 TIDY_TAG_CONST(HEADER);
1773 TIDY_TAG_CONST(HGROUP);
1774 TIDY_TAG_CONST(MAIN);
1775 TIDY_TAG_CONST(MARK);
1776 TIDY_TAG_CONST(MENUITEM);
1777 TIDY_TAG_CONST(METER);
1778 TIDY_TAG_CONST(NAV);
1779 TIDY_TAG_CONST(OUTPUT);
1780 TIDY_TAG_CONST(PROGRESS);
1781 TIDY_TAG_CONST(SECTION);
1782 TIDY_TAG_CONST(SOURCE);
1783 TIDY_TAG_CONST(SUMMARY);
1784 TIDY_TAG_CONST(TEMPLATE);
1785 TIDY_TAG_CONST(TIME);
1786 TIDY_TAG_CONST(TRACK);
1787 TIDY_TAG_CONST(VIDEO);
1788 # endif
1789 }
1790
1791 #endif
1792