1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: John Coggeshall <john@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include "php.h"
22 #include "php_tidy.h"
23
24 #ifdef HAVE_TIDY
25
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40
41 #include "tidy_arginfo.h"
42
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47
48 /* {{{ ext/tidy macros */
49 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
50
51 #define TIDY_SET_CONTEXT \
52 zval *object = getThis();
53
54 #define TIDY_FETCH_OBJECT \
55 PHPTidyObj *obj; \
56 zval *object; \
57 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) == FAILURE) { \
58 RETURN_THROWS(); \
59 } \
60 obj = Z_TIDY_P(object); \
61
62 #define TIDY_FETCH_INITIALIZED_OBJECT \
63 TIDY_FETCH_OBJECT; \
64 if (!obj->ptdoc->initialized) { \
65 zend_throw_error(NULL, "tidy object is not initialized"); \
66 return; \
67 }
68
69 #define TIDY_FETCH_ONLY_OBJECT \
70 PHPTidyObj *obj; \
71 TIDY_SET_CONTEXT; \
72 if (zend_parse_parameters_none() == FAILURE) { \
73 RETURN_THROWS(); \
74 } \
75 obj = Z_TIDY_P(object); \
76
77 #define TIDY_APPLY_CONFIG(_doc, _val_str, _val_ht) \
78 if (_val_ht) { \
79 _php_tidy_apply_config_array(_doc, _val_ht); \
80 } else if (_val_str) { \
81 TIDY_OPEN_BASE_DIR_CHECK(ZSTR_VAL(_val_str)); \
82 php_tidy_load_config(_doc, ZSTR_VAL(_val_str)); \
83 }
84
85 #define TIDY_OPEN_BASE_DIR_CHECK(filename) \
86 if (php_check_open_basedir(filename)) { \
87 RETURN_FALSE; \
88 } \
89
90 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
91 if (TG(default_config) && TG(default_config)[0]) { \
92 php_tidy_load_config(_doc, TG(default_config)); \
93 }
94 /* }}} */
95
96 /* {{{ ext/tidy structs */
97 typedef struct _PHPTidyDoc PHPTidyDoc;
98 typedef struct _PHPTidyObj PHPTidyObj;
99
100 typedef enum {
101 is_node,
102 is_doc
103 } tidy_obj_type;
104
105 typedef enum {
106 is_root_node,
107 is_html_node,
108 is_head_node,
109 is_body_node
110 } tidy_base_nodetypes;
111
112 struct _PHPTidyDoc {
113 TidyDoc doc;
114 TidyBuffer *errbuf;
115 unsigned int ref_count;
116 unsigned int initialized:1;
117 };
118
119 struct _PHPTidyObj {
120 TidyNode node;
121 tidy_obj_type type;
122 PHPTidyDoc *ptdoc;
123 zend_object std;
124 };
125
php_tidy_fetch_object(zend_object * obj)126 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
127 return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
128 }
129
130 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
131 /* }}} */
132
133 /* {{{ ext/tidy prototypes */
134 static zend_string *php_tidy_file_to_mem(char *, bool);
135 static void tidy_object_free_storage(zend_object *);
136 static zend_object *tidy_object_new_node(zend_class_entry *);
137 static zend_object *tidy_object_new_doc(zend_class_entry *);
138 static zval *tidy_instantiate(zend_class_entry *, zval *);
139 static zend_result tidy_doc_cast_handler(zend_object *, zval *, int);
140 static zend_result tidy_node_cast_handler(zend_object *, zval *, int);
141 static void tidy_doc_update_properties(PHPTidyObj *);
142 static void tidy_add_node_default_properties(PHPTidyObj *);
143 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
144 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
145 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
146 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
147 static PHP_INI_MH(php_tidy_set_clean_output);
148 static void php_tidy_clean_output_start(const char *name, size_t name_len);
149 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
150 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context);
151
152 static PHP_MINIT_FUNCTION(tidy);
153 static PHP_MSHUTDOWN_FUNCTION(tidy);
154 static PHP_RINIT_FUNCTION(tidy);
155 static PHP_RSHUTDOWN_FUNCTION(tidy);
156 static PHP_MINFO_FUNCTION(tidy);
157
158 ZEND_DECLARE_MODULE_GLOBALS(tidy)
159
160 PHP_INI_BEGIN()
161 STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals)
162 STD_PHP_INI_BOOLEAN("tidy.clean_output", "0", PHP_INI_USER, php_tidy_set_clean_output, clean_output, zend_tidy_globals, tidy_globals)
163 PHP_INI_END()
164
165 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
166
167 static zend_object_handlers tidy_object_handlers_doc;
168 static zend_object_handlers tidy_object_handlers_node;
169
170 zend_module_entry tidy_module_entry = {
171 STANDARD_MODULE_HEADER,
172 "tidy",
173 ext_functions,
174 PHP_MINIT(tidy),
175 PHP_MSHUTDOWN(tidy),
176 PHP_RINIT(tidy),
177 PHP_RSHUTDOWN(tidy),
178 PHP_MINFO(tidy),
179 PHP_TIDY_VERSION,
180 PHP_MODULE_GLOBALS(tidy),
181 NULL,
182 NULL,
183 NULL,
184 STANDARD_MODULE_PROPERTIES_EX
185 };
186
187 #ifdef COMPILE_DL_TIDY
188 #ifdef ZTS
189 ZEND_TSRMLS_CACHE_DEFINE()
190 #endif
ZEND_GET_MODULE(tidy)191 ZEND_GET_MODULE(tidy)
192 #endif
193
194 static void* TIDY_CALL php_tidy_malloc(size_t len)
195 {
196 return emalloc(len);
197 }
198
php_tidy_realloc(void * buf,size_t len)199 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
200 {
201 return erealloc(buf, len);
202 }
203
php_tidy_free(void * buf)204 static void TIDY_CALL php_tidy_free(void *buf)
205 {
206 efree(buf);
207 }
208
php_tidy_panic(ctmbstr msg)209 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
210 {
211 php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
212 }
213
php_tidy_load_config(TidyDoc doc,const char * path)214 static void php_tidy_load_config(TidyDoc doc, const char *path)
215 {
216 int ret = tidyLoadConfig(doc, path);
217 if (ret < 0) {
218 php_error_docref(NULL, E_WARNING, "Could not load the Tidy configuration file \"%s\"", path);
219 } else if (ret > 0) {
220 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the Tidy configuration file \"%s\"", path);
221 }
222 }
223
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)224 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
225 {
226 TidyOption opt = tidyGetOptionByName(doc, optname);
227 zend_string *str, *tmp_str;
228 zend_long lval;
229
230 if (!opt) {
231 php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
232 return FAILURE;
233 }
234
235 if (tidyOptIsReadOnly(opt)) {
236 php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
237 return FAILURE;
238 }
239
240 switch(tidyOptGetType(opt)) {
241 case TidyString:
242 str = zval_get_tmp_string(value, &tmp_str);
243 if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
244 zend_tmp_string_release(tmp_str);
245 return SUCCESS;
246 }
247 zend_tmp_string_release(tmp_str);
248 break;
249
250 case TidyInteger:
251 lval = zval_get_long(value);
252 if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
253 return SUCCESS;
254 }
255 break;
256
257 case TidyBoolean:
258 lval = zval_get_long(value);
259 if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
260 return SUCCESS;
261 }
262 break;
263
264 default:
265 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
266 break;
267 }
268
269 return FAILURE;
270 }
271
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,bool is_file)272 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, bool is_file)
273 {
274 char *enc = NULL;
275 size_t enc_len = 0;
276 TidyDoc doc;
277 TidyBuffer *errbuf;
278 zend_string *data, *arg1, *config_str = NULL;
279 HashTable *config_ht = NULL;
280
281 if (is_file) {
282 bool use_include_path = 0;
283
284 ZEND_PARSE_PARAMETERS_START(1, 4)
285 Z_PARAM_PATH_STR(arg1)
286 Z_PARAM_OPTIONAL
287 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
288 Z_PARAM_STRING(enc, enc_len)
289 Z_PARAM_BOOL(use_include_path)
290 ZEND_PARSE_PARAMETERS_END();
291
292 if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
293 RETURN_FALSE;
294 }
295 } else {
296 ZEND_PARSE_PARAMETERS_START(1, 3)
297 Z_PARAM_STR(arg1)
298 Z_PARAM_OPTIONAL
299 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
300 Z_PARAM_STRING(enc, enc_len)
301 ZEND_PARSE_PARAMETERS_END();
302
303 data = arg1;
304 }
305
306 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
307 zend_argument_value_error(1, "is too long");
308 RETURN_THROWS();
309 }
310
311 doc = tidyCreate();
312 errbuf = emalloc(sizeof(TidyBuffer));
313 tidyBufInit(errbuf);
314
315 if (tidySetErrorBuffer(doc, errbuf) != 0) {
316 tidyBufFree(errbuf);
317 efree(errbuf);
318 tidyRelease(doc);
319 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
320 }
321
322 tidyOptSetBool(doc, TidyForceOutput, yes);
323 tidyOptSetBool(doc, TidyMark, no);
324
325 TIDY_SET_DEFAULT_CONFIG(doc);
326
327 TIDY_APPLY_CONFIG(doc, config_str, config_ht);
328
329 if(enc_len) {
330 if (tidySetCharEncoding(doc, enc) < 0) {
331 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
332 RETVAL_FALSE;
333 }
334 }
335
336 if (data) {
337 TidyBuffer buf;
338
339 tidyBufInit(&buf);
340 tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
341
342 if (tidyParseBuffer(doc, &buf) < 0) {
343 php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
344 RETVAL_FALSE;
345 } else {
346 if (tidyCleanAndRepair(doc) >= 0) {
347 TidyBuffer output;
348 tidyBufInit(&output);
349
350 tidySaveBuffer (doc, &output);
351 FIX_BUFFER(&output);
352 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
353 tidyBufFree(&output);
354 } else {
355 RETVAL_FALSE;
356 }
357 }
358 }
359
360 if (is_file) {
361 zend_string_release_ex(data, 0);
362 }
363
364 tidyBufFree(errbuf);
365 efree(errbuf);
366 tidyRelease(doc);
367 }
368
php_tidy_file_to_mem(char * filename,bool use_include_path)369 static zend_string *php_tidy_file_to_mem(char *filename, bool use_include_path)
370 {
371 php_stream *stream;
372 zend_string *data = NULL;
373
374 if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
375 return NULL;
376 }
377 if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
378 data = ZSTR_EMPTY_ALLOC();
379 }
380 php_stream_close(stream);
381
382 return data;
383 }
384
tidy_object_free_storage(zend_object * object)385 static void tidy_object_free_storage(zend_object *object)
386 {
387 PHPTidyObj *intern = php_tidy_fetch_object(object);
388
389 zend_object_std_dtor(&intern->std);
390
391 if (intern->ptdoc) {
392 intern->ptdoc->ref_count--;
393
394 if (intern->ptdoc->ref_count <= 0) {
395 tidyBufFree(intern->ptdoc->errbuf);
396 efree(intern->ptdoc->errbuf);
397 tidyRelease(intern->ptdoc->doc);
398 efree(intern->ptdoc);
399 }
400 }
401 }
402
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)403 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
404 {
405 PHPTidyObj *intern;
406
407 intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
408 zend_object_std_init(&intern->std, class_type);
409 object_properties_init(&intern->std, class_type);
410
411 switch(objtype) {
412 case is_node:
413 break;
414
415 case is_doc:
416 intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
417 intern->ptdoc->doc = tidyCreate();
418 intern->ptdoc->ref_count = 1;
419 intern->ptdoc->initialized = 0;
420 intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
421 tidyBufInit(intern->ptdoc->errbuf);
422
423 if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
424 tidyBufFree(intern->ptdoc->errbuf);
425 efree(intern->ptdoc->errbuf);
426 tidyRelease(intern->ptdoc->doc);
427 efree(intern->ptdoc);
428 efree(intern);
429 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
430 }
431
432 tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
433 tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
434
435 TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
436 break;
437 }
438
439 intern->std.handlers = handlers;
440
441 return &intern->std;
442 }
443
tidy_object_new_node(zend_class_entry * class_type)444 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
445 {
446 return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
447 }
448
tidy_object_new_doc(zend_class_entry * class_type)449 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
450 {
451 return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
452 }
453
tidy_instantiate(zend_class_entry * pce,zval * object)454 static zval *tidy_instantiate(zend_class_entry *pce, zval *object)
455 {
456 object_init_ex(object, pce);
457 return object;
458 }
459
tidy_doc_cast_handler(zend_object * in,zval * out,int type)460 static zend_result tidy_doc_cast_handler(zend_object *in, zval *out, int type)
461 {
462 TidyBuffer output;
463 PHPTidyObj *obj;
464
465 switch (type) {
466 case IS_LONG:
467 case _IS_NUMBER:
468 ZVAL_LONG(out, 0);
469 break;
470
471 case IS_DOUBLE:
472 ZVAL_DOUBLE(out, 0);
473 break;
474
475 case _IS_BOOL:
476 ZVAL_TRUE(out);
477 break;
478
479 case IS_STRING:
480 obj = php_tidy_fetch_object(in);
481 tidyBufInit(&output);
482 tidySaveBuffer (obj->ptdoc->doc, &output);
483 if (output.size) {
484 ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
485 } else {
486 ZVAL_EMPTY_STRING(out);
487 }
488 tidyBufFree(&output);
489 break;
490
491 default:
492 return FAILURE;
493 }
494
495 return SUCCESS;
496 }
497
tidy_node_cast_handler(zend_object * in,zval * out,int type)498 static zend_result tidy_node_cast_handler(zend_object *in, zval *out, int type)
499 {
500 TidyBuffer buf;
501 PHPTidyObj *obj;
502
503 switch(type) {
504 case IS_LONG:
505 case _IS_NUMBER:
506 ZVAL_LONG(out, 0);
507 break;
508
509 case IS_DOUBLE:
510 ZVAL_DOUBLE(out, 0);
511 break;
512
513 case _IS_BOOL:
514 ZVAL_TRUE(out);
515 break;
516
517 case IS_STRING:
518 obj = php_tidy_fetch_object(in);
519 tidyBufInit(&buf);
520 if (obj->ptdoc) {
521 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
522 ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
523 } else {
524 ZVAL_EMPTY_STRING(out);
525 }
526 tidyBufFree(&buf);
527 break;
528
529 default:
530 return FAILURE;
531 }
532
533 return SUCCESS;
534 }
535
tidy_doc_update_properties(PHPTidyObj * obj)536 static void tidy_doc_update_properties(PHPTidyObj *obj)
537 {
538 TidyBuffer output;
539
540 tidyBufInit(&output);
541 tidySaveBuffer (obj->ptdoc->doc, &output);
542
543 if (output.size) {
544 zend_update_property_stringl(
545 tidy_ce_doc,
546 &obj->std,
547 "value",
548 sizeof("value") - 1,
549 (char*) output.bp,
550 output.size-1
551 );
552 }
553
554 tidyBufFree(&output);
555
556 if (obj->ptdoc->errbuf->size) {
557 zend_update_property_stringl(
558 tidy_ce_doc,
559 &obj->std,
560 "errorBuffer",
561 sizeof("errorBuffer") - 1,
562 (char*) obj->ptdoc->errbuf->bp,
563 obj->ptdoc->errbuf->size-1
564 );
565 }
566 }
567
tidy_add_node_default_properties(PHPTidyObj * obj)568 static void tidy_add_node_default_properties(PHPTidyObj *obj)
569 {
570 TidyBuffer buf;
571 TidyAttr tempattr;
572 TidyNode tempnode;
573 zval attribute, children, temp;
574 PHPTidyObj *newobj;
575 char *name;
576
577 tidyBufInit(&buf);
578 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
579
580 zend_update_property_stringl(
581 tidy_ce_node,
582 &obj->std,
583 "value",
584 sizeof("value") - 1,
585 buf.size ? (char *) buf.bp : "",
586 buf.size ? buf.size - 1 : 0
587 );
588
589 tidyBufFree(&buf);
590
591 name = (char *) tidyNodeGetName(obj->node);
592
593 zend_update_property_string(
594 tidy_ce_node,
595 &obj->std,
596 "name",
597 sizeof("name") - 1,
598 name ? name : ""
599 );
600
601 zend_update_property_long(
602 tidy_ce_node,
603 &obj->std,
604 "type",
605 sizeof("type") - 1,
606 tidyNodeGetType(obj->node)
607 );
608
609 zend_update_property_long(
610 tidy_ce_node,
611 &obj->std,
612 "line",
613 sizeof("line") - 1,
614 tidyNodeLine(obj->node)
615 );
616
617 zend_update_property_long(
618 tidy_ce_node,
619 &obj->std,
620 "column",
621 sizeof("column") - 1,
622 tidyNodeColumn(obj->node)
623 );
624
625 zend_update_property_bool(
626 tidy_ce_node,
627 &obj->std,
628 "proprietary",
629 sizeof("proprietary") - 1,
630 tidyNodeIsProp(obj->ptdoc->doc, obj->node)
631 );
632
633 switch(tidyNodeGetType(obj->node)) {
634 case TidyNode_Root:
635 case TidyNode_DocType:
636 case TidyNode_Text:
637 case TidyNode_Comment:
638 zend_update_property_null(
639 tidy_ce_node,
640 &obj->std,
641 "id",
642 sizeof("id") - 1
643 );
644 break;
645
646 default:
647 zend_update_property_long(
648 tidy_ce_node,
649 &obj->std,
650 "id",
651 sizeof("id") - 1,
652 tidyNodeGetId(obj->node)
653 );
654 }
655
656 tempattr = tidyAttrFirst(obj->node);
657
658 if (tempattr) {
659 char *name, *val;
660 array_init(&attribute);
661
662 do {
663 name = (char *)tidyAttrName(tempattr);
664 val = (char *)tidyAttrValue(tempattr);
665 if (name) {
666 if (val) {
667 add_assoc_string(&attribute, name, val);
668 } else {
669 add_assoc_str(&attribute, name, zend_empty_string);
670 }
671 }
672 } while((tempattr = tidyAttrNext(tempattr)));
673 } else {
674 ZVAL_NULL(&attribute);
675 }
676
677 zend_update_property(
678 tidy_ce_node,
679 &obj->std,
680 "attribute",
681 sizeof("attribute") - 1,
682 &attribute
683 );
684
685 zval_ptr_dtor(&attribute);
686
687 tempnode = tidyGetChild(obj->node);
688
689 if (tempnode) {
690 array_init(&children);
691 do {
692 tidy_instantiate(tidy_ce_node, &temp);
693 newobj = Z_TIDY_P(&temp);
694 newobj->node = tempnode;
695 newobj->type = is_node;
696 newobj->ptdoc = obj->ptdoc;
697 newobj->ptdoc->ref_count++;
698
699 tidy_add_node_default_properties(newobj);
700 add_next_index_zval(&children, &temp);
701
702 } while((tempnode = tidyGetNext(tempnode)));
703
704 } else {
705 ZVAL_NULL(&children);
706 }
707
708 zend_update_property(
709 tidy_ce_node,
710 &obj->std,
711 "child",
712 sizeof("child") - 1,
713 &children
714 );
715
716 zval_ptr_dtor(&children);
717 }
718
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)719 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
720 {
721 *type = tidyOptGetType(opt);
722
723 switch (*type) {
724 case TidyString: {
725 char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
726 if (val) {
727 return (void *) zend_string_init(val, strlen(val), 0);
728 } else {
729 return (void *) ZSTR_EMPTY_ALLOC();
730 }
731 }
732 break;
733
734 case TidyInteger:
735 return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
736 break;
737
738 case TidyBoolean:
739 return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
740 break;
741 }
742
743 /* should not happen */
744 return NULL;
745 }
746
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)747 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
748 {
749 PHPTidyObj *newobj;
750 TidyNode node;
751 TIDY_FETCH_OBJECT;
752
753 switch (node_type) {
754 case is_root_node:
755 node = tidyGetRoot(obj->ptdoc->doc);
756 break;
757
758 case is_html_node:
759 node = tidyGetHtml(obj->ptdoc->doc);
760 break;
761
762 case is_head_node:
763 node = tidyGetHead(obj->ptdoc->doc);
764 break;
765
766 case is_body_node:
767 node = tidyGetBody(obj->ptdoc->doc);
768 break;
769
770 EMPTY_SWITCH_DEFAULT_CASE()
771 }
772
773 if (!node) {
774 RETURN_NULL();
775 }
776
777 tidy_instantiate(tidy_ce_node, return_value);
778 newobj = Z_TIDY_P(return_value);
779 newobj->type = is_node;
780 newobj->ptdoc = obj->ptdoc;
781 newobj->node = node;
782 newobj->ptdoc->ref_count++;
783
784 tidy_add_node_default_properties(newobj);
785 }
786
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)787 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
788 {
789 zval *opt_val;
790 zend_string *opt_name;
791
792 if (!HT_IS_PACKED(ht_options)) {
793 ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
794 if (opt_name == NULL) {
795 continue;
796 }
797 _php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
798 } ZEND_HASH_FOREACH_END();
799 }
800 return SUCCESS;
801 }
802
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)803 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
804 {
805 TidyBuffer buf;
806
807 if(enc) {
808 if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
809 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
810 return FAILURE;
811 }
812 }
813
814 obj->ptdoc->initialized = 1;
815
816 tidyBufInit(&buf);
817 tidyBufAttach(&buf, (byte *) string, len);
818 if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
819 php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
820 return FAILURE;
821 }
822 tidy_doc_update_properties(obj);
823
824 return SUCCESS;
825 }
826
PHP_MINIT_FUNCTION(tidy)827 static PHP_MINIT_FUNCTION(tidy)
828 {
829 tidySetMallocCall(php_tidy_malloc);
830 tidySetReallocCall(php_tidy_realloc);
831 tidySetFreeCall(php_tidy_free);
832 tidySetPanicCall(php_tidy_panic);
833
834 REGISTER_INI_ENTRIES();
835
836 tidy_ce_doc = register_class_tidy();
837 tidy_ce_doc->create_object = tidy_object_new_doc;
838 memcpy(&tidy_object_handlers_doc, &std_object_handlers, sizeof(zend_object_handlers));
839 tidy_object_handlers_doc.clone_obj = NULL;
840
841 tidy_ce_node = register_class_tidyNode();
842 tidy_ce_node->create_object = tidy_object_new_node;
843 memcpy(&tidy_object_handlers_node, &std_object_handlers, sizeof(zend_object_handlers));
844 tidy_object_handlers_node.clone_obj = NULL;
845
846 tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
847 tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
848
849 tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
850 tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
851
852 register_tidy_symbols(module_number);
853
854 php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
855
856 return SUCCESS;
857 }
858
PHP_RINIT_FUNCTION(tidy)859 static PHP_RINIT_FUNCTION(tidy)
860 {
861 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
862 ZEND_TSRMLS_CACHE_UPDATE();
863 #endif
864
865 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
866
867 return SUCCESS;
868 }
869
PHP_RSHUTDOWN_FUNCTION(tidy)870 static PHP_RSHUTDOWN_FUNCTION(tidy)
871 {
872 TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
873
874 return SUCCESS;
875 }
876
PHP_MSHUTDOWN_FUNCTION(tidy)877 static PHP_MSHUTDOWN_FUNCTION(tidy)
878 {
879 UNREGISTER_INI_ENTRIES();
880 return SUCCESS;
881 }
882
PHP_MINFO_FUNCTION(tidy)883 static PHP_MINFO_FUNCTION(tidy)
884 {
885 php_info_print_table_start();
886 php_info_print_table_row(2, "Tidy support", "enabled");
887 #ifdef HAVE_TIDYBUFFIO_H
888 php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
889 #elif defined(HAVE_TIDYP_H)
890 php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
891 #endif
892 #ifdef HAVE_TIDYRELEASEDATE
893 php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
894 #endif
895 php_info_print_table_end();
896
897 DISPLAY_INI_ENTRIES();
898 }
899
PHP_INI_MH(php_tidy_set_clean_output)900 static PHP_INI_MH(php_tidy_set_clean_output)
901 {
902 int status;
903 bool value;
904
905 value = zend_ini_parse_bool(new_value);
906
907 if (stage == PHP_INI_STAGE_RUNTIME) {
908 status = php_output_get_status();
909
910 if (value && (status & PHP_OUTPUT_WRITTEN)) {
911 php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
912 return FAILURE;
913 }
914 if (status & PHP_OUTPUT_SENT) {
915 php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
916 return FAILURE;
917 }
918 }
919
920 status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
921
922 if (stage == PHP_INI_STAGE_RUNTIME && value) {
923 if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
924 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
925 }
926 }
927
928 return status;
929 }
930
931 /*
932 * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
933 */
934
php_tidy_clean_output_start(const char * name,size_t name_len)935 static void php_tidy_clean_output_start(const char *name, size_t name_len)
936 {
937 php_output_handler *h;
938
939 if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
940 php_output_handler_start(h);
941 }
942 }
943
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)944 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
945 {
946 if (chunk_size) {
947 php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
948 return NULL;
949 }
950 if (!TG(clean_output)) {
951 TG(clean_output) = 1;
952 }
953 return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
954 }
955
php_tidy_output_handler(void ** nothing,php_output_context * output_context)956 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context)
957 {
958 zend_result status = FAILURE;
959 TidyDoc doc;
960 TidyBuffer inbuf, outbuf, errbuf;
961
962 if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
963 doc = tidyCreate();
964 tidyBufInit(&errbuf);
965
966 if (0 == tidySetErrorBuffer(doc, &errbuf)) {
967 tidyOptSetBool(doc, TidyForceOutput, yes);
968 tidyOptSetBool(doc, TidyMark, no);
969
970 if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
971 php_error_docref(NULL, E_WARNING, "Input string is too long");
972 return status;
973 }
974
975 TIDY_SET_DEFAULT_CONFIG(doc);
976
977 tidyBufInit(&inbuf);
978 tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
979
980 if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
981 tidyBufInit(&outbuf);
982 tidySaveBuffer(doc, &outbuf);
983 FIX_BUFFER(&outbuf);
984 output_context->out.data = (char *) outbuf.bp;
985 output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
986 output_context->out.free = 1;
987 status = SUCCESS;
988 }
989 }
990
991 tidyRelease(doc);
992 tidyBufFree(&errbuf);
993 }
994
995 return status;
996 }
997
998 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)999 PHP_FUNCTION(tidy_parse_string)
1000 {
1001 char *enc = NULL;
1002 size_t enc_len = 0;
1003 zend_string *input, *options_str = NULL;
1004 HashTable *options_ht = NULL;
1005 PHPTidyObj *obj;
1006
1007 ZEND_PARSE_PARAMETERS_START(1, 3)
1008 Z_PARAM_STR(input)
1009 Z_PARAM_OPTIONAL
1010 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1011 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1012 ZEND_PARSE_PARAMETERS_END();
1013
1014 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1015 zend_argument_value_error(1, "is too long");
1016 RETURN_THROWS();
1017 }
1018
1019 tidy_instantiate(tidy_ce_doc, return_value);
1020 obj = Z_TIDY_P(return_value);
1021
1022 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1023
1024 if (php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1025 zval_ptr_dtor(return_value);
1026 RETURN_FALSE;
1027 }
1028 }
1029 /* }}} */
1030
1031 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1032 PHP_FUNCTION(tidy_get_error_buffer)
1033 {
1034 TIDY_FETCH_OBJECT;
1035
1036 if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1037 RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1038 } else {
1039 RETURN_FALSE;
1040 }
1041 }
1042 /* }}} */
1043
1044 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1045 PHP_FUNCTION(tidy_get_output)
1046 {
1047 TidyBuffer output;
1048 TIDY_FETCH_OBJECT;
1049
1050 tidyBufInit(&output);
1051 tidySaveBuffer(obj->ptdoc->doc, &output);
1052 FIX_BUFFER(&output);
1053 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1054 tidyBufFree(&output);
1055 }
1056 /* }}} */
1057
1058 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1059 PHP_FUNCTION(tidy_parse_file)
1060 {
1061 char *enc = NULL;
1062 size_t enc_len = 0;
1063 bool use_include_path = 0;
1064 zend_string *inputfile, *contents, *options_str = NULL;
1065 HashTable *options_ht = NULL;
1066
1067 PHPTidyObj *obj;
1068
1069 ZEND_PARSE_PARAMETERS_START(1, 4)
1070 Z_PARAM_PATH_STR(inputfile)
1071 Z_PARAM_OPTIONAL
1072 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1073 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1074 Z_PARAM_BOOL(use_include_path)
1075 ZEND_PARSE_PARAMETERS_END();
1076
1077 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1078 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1079 RETURN_FALSE;
1080 }
1081
1082 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1083 zend_string_release_ex(contents, 0);
1084 zend_value_error("Input string is too long");
1085 RETURN_THROWS();
1086 }
1087
1088 tidy_instantiate(tidy_ce_doc, return_value);
1089 obj = Z_TIDY_P(return_value);
1090
1091 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1092
1093 if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1094 zval_ptr_dtor(return_value);
1095 RETVAL_FALSE;
1096 }
1097
1098 zend_string_release_ex(contents, 0);
1099 }
1100 /* }}} */
1101
1102 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1103 PHP_FUNCTION(tidy_clean_repair)
1104 {
1105 TIDY_FETCH_OBJECT;
1106
1107 if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1108 tidy_doc_update_properties(obj);
1109 RETURN_TRUE;
1110 }
1111
1112 RETURN_FALSE;
1113 }
1114 /* }}} */
1115
1116 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1117 PHP_FUNCTION(tidy_repair_string)
1118 {
1119 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1120 }
1121 /* }}} */
1122
1123 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1124 PHP_FUNCTION(tidy_repair_file)
1125 {
1126 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1127 }
1128 /* }}} */
1129
1130 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1131 PHP_FUNCTION(tidy_diagnose)
1132 {
1133 TIDY_FETCH_OBJECT;
1134
1135 if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1136 tidy_doc_update_properties(obj);
1137 RETURN_TRUE;
1138 }
1139
1140 RETURN_FALSE;
1141 }
1142 /* }}} */
1143
1144 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1145 PHP_FUNCTION(tidy_get_release)
1146 {
1147 if (zend_parse_parameters_none() == FAILURE) {
1148 RETURN_THROWS();
1149 }
1150
1151 #ifdef HAVE_TIDYRELEASEDATE
1152 RETURN_STRING((char *)tidyReleaseDate());
1153 #else
1154 RETURN_STRING((char *)"unknown");
1155 #endif
1156 }
1157 /* }}} */
1158
1159
1160 #ifdef HAVE_TIDYOPTGETDOC
1161 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1162 PHP_FUNCTION(tidy_get_opt_doc)
1163 {
1164 PHPTidyObj *obj;
1165 char *optval, *optname;
1166 size_t optname_len;
1167 TidyOption opt;
1168 zval *object;
1169
1170 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1171 RETURN_THROWS();
1172 }
1173
1174 obj = Z_TIDY_P(object);
1175
1176 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1177
1178 if (!opt) {
1179 zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1180 RETURN_THROWS();
1181 }
1182
1183 if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1184 RETURN_STRING(optval);
1185 }
1186
1187 RETURN_FALSE;
1188 }
1189 /* }}} */
1190 #endif
1191
1192
1193 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1194 PHP_FUNCTION(tidy_get_config)
1195 {
1196 TidyIterator itOpt;
1197 char *opt_name;
1198 void *opt_value;
1199 TidyOptionType optt;
1200
1201 TIDY_FETCH_OBJECT;
1202
1203 itOpt = tidyGetOptionList(obj->ptdoc->doc);
1204
1205 array_init(return_value);
1206
1207 while (itOpt) {
1208 TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1209
1210 opt_name = (char *)tidyOptGetName(opt);
1211 opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1212 switch (optt) {
1213 case TidyString:
1214 add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1215 break;
1216
1217 case TidyInteger:
1218 add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1219 break;
1220
1221 case TidyBoolean:
1222 add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1223 break;
1224 }
1225 }
1226
1227 return;
1228 }
1229 /* }}} */
1230
1231 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1232 PHP_FUNCTION(tidy_get_status)
1233 {
1234 TIDY_FETCH_OBJECT;
1235
1236 RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1237 }
1238 /* }}} */
1239
1240 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1241 PHP_FUNCTION(tidy_get_html_ver)
1242 {
1243 TIDY_FETCH_INITIALIZED_OBJECT;
1244
1245 RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1246 }
1247 /* }}} */
1248
1249 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1250 PHP_FUNCTION(tidy_is_xhtml)
1251 {
1252 TIDY_FETCH_INITIALIZED_OBJECT;
1253
1254 RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1255 }
1256 /* }}} */
1257
1258 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1259 PHP_FUNCTION(tidy_is_xml)
1260 {
1261 TIDY_FETCH_INITIALIZED_OBJECT;
1262
1263 RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1264 }
1265 /* }}} */
1266
1267 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1268 PHP_FUNCTION(tidy_error_count)
1269 {
1270 TIDY_FETCH_OBJECT;
1271
1272 RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1273 }
1274 /* }}} */
1275
1276 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1277 PHP_FUNCTION(tidy_warning_count)
1278 {
1279 TIDY_FETCH_OBJECT;
1280
1281 RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1282 }
1283 /* }}} */
1284
1285 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1286 PHP_FUNCTION(tidy_access_count)
1287 {
1288 TIDY_FETCH_OBJECT;
1289
1290 RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1291 }
1292 /* }}} */
1293
1294 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1295 PHP_FUNCTION(tidy_config_count)
1296 {
1297 TIDY_FETCH_OBJECT;
1298
1299 RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1300 }
1301 /* }}} */
1302
1303 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1304 PHP_FUNCTION(tidy_getopt)
1305 {
1306 PHPTidyObj *obj;
1307 char *optname;
1308 void *optval;
1309 size_t optname_len;
1310 TidyOption opt;
1311 TidyOptionType optt;
1312 zval *object;
1313
1314 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1315 RETURN_THROWS();
1316 }
1317
1318 obj = Z_TIDY_P(object);
1319
1320 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1321
1322 if (!opt) {
1323 zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1324 RETURN_THROWS();
1325 }
1326
1327 optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1328 switch (optt) {
1329 case TidyString:
1330 RETVAL_STR((zend_string*)optval);
1331 return;
1332
1333 case TidyInteger:
1334 RETURN_LONG((zend_long)optval);
1335 break;
1336
1337 case TidyBoolean:
1338 if (optval) {
1339 RETURN_TRUE;
1340 } else {
1341 RETURN_FALSE;
1342 }
1343 break;
1344
1345 default:
1346 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1347 break;
1348 }
1349
1350 RETURN_FALSE;
1351 }
1352 /* }}} */
1353
PHP_METHOD(tidy,__construct)1354 PHP_METHOD(tidy, __construct)
1355 {
1356 char *enc = NULL;
1357 size_t enc_len = 0;
1358 bool use_include_path = 0;
1359 HashTable *options_ht = NULL;
1360 zend_string *contents, *inputfile = NULL, *options_str = NULL;
1361 PHPTidyObj *obj;
1362
1363 ZEND_PARSE_PARAMETERS_START(0, 4)
1364 Z_PARAM_OPTIONAL
1365 Z_PARAM_PATH_STR_OR_NULL(inputfile)
1366 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1367 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1368 Z_PARAM_BOOL(use_include_path)
1369 ZEND_PARSE_PARAMETERS_END();
1370
1371 TIDY_SET_CONTEXT;
1372 obj = Z_TIDY_P(object);
1373
1374 if (inputfile) {
1375 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1376 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1377 return;
1378 }
1379
1380 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1381 zend_string_release_ex(contents, 0);
1382 zend_value_error("Input string is too long");
1383 RETURN_THROWS();
1384 }
1385
1386 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1387
1388 php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1389
1390 zend_string_release_ex(contents, 0);
1391 }
1392 }
1393
PHP_METHOD(tidy,parseFile)1394 PHP_METHOD(tidy, parseFile)
1395 {
1396 char *enc = NULL;
1397 size_t enc_len = 0;
1398 bool use_include_path = 0;
1399 HashTable *options_ht = NULL;
1400 zend_string *inputfile, *contents, *options_str = NULL;
1401 PHPTidyObj *obj;
1402
1403 ZEND_PARSE_PARAMETERS_START(1, 4)
1404 Z_PARAM_PATH_STR(inputfile)
1405 Z_PARAM_OPTIONAL
1406 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1407 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1408 Z_PARAM_BOOL(use_include_path)
1409 ZEND_PARSE_PARAMETERS_END();
1410
1411 TIDY_SET_CONTEXT;
1412 obj = Z_TIDY_P(object);
1413
1414 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1415 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1416 RETURN_FALSE;
1417 }
1418
1419 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1420 zend_string_release_ex(contents, 0);
1421 zend_value_error("Input string is too long");
1422 RETURN_THROWS();
1423 }
1424
1425 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1426
1427 if (php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1428 RETVAL_FALSE;
1429 } else {
1430 RETVAL_TRUE;
1431 }
1432
1433 zend_string_release_ex(contents, 0);
1434 }
1435
PHP_METHOD(tidy,parseString)1436 PHP_METHOD(tidy, parseString)
1437 {
1438 char *enc = NULL;
1439 size_t enc_len = 0;
1440 HashTable *options_ht = NULL;
1441 PHPTidyObj *obj;
1442 zend_string *input, *options_str = NULL;
1443
1444 ZEND_PARSE_PARAMETERS_START(1, 3)
1445 Z_PARAM_STR(input)
1446 Z_PARAM_OPTIONAL
1447 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1448 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1449 ZEND_PARSE_PARAMETERS_END();
1450
1451 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1452 zend_argument_value_error(1, "is too long");
1453 RETURN_THROWS();
1454 }
1455
1456 TIDY_SET_CONTEXT;
1457 obj = Z_TIDY_P(object);
1458
1459 TIDY_APPLY_CONFIG(obj->ptdoc->doc, options_str, options_ht);
1460
1461 if(php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1462 RETURN_TRUE;
1463 }
1464
1465 RETURN_FALSE;
1466 }
1467
1468
1469 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1470 PHP_FUNCTION(tidy_get_root)
1471 {
1472 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1473 }
1474 /* }}} */
1475
1476 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1477 PHP_FUNCTION(tidy_get_html)
1478 {
1479 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1480 }
1481 /* }}} */
1482
1483 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1484 PHP_FUNCTION(tidy_get_head)
1485 {
1486 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1487 }
1488 /* }}} */
1489
1490 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1491 PHP_FUNCTION(tidy_get_body)
1492 {
1493 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1494 }
1495 /* }}} */
1496
1497 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1498 PHP_METHOD(tidyNode, hasChildren)
1499 {
1500 TIDY_FETCH_ONLY_OBJECT;
1501
1502 if (tidyGetChild(obj->node)) {
1503 RETURN_TRUE;
1504 } else {
1505 RETURN_FALSE;
1506 }
1507 }
1508 /* }}} */
1509
1510 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1511 PHP_METHOD(tidyNode, hasSiblings)
1512 {
1513 TIDY_FETCH_ONLY_OBJECT;
1514
1515 if (obj->node && tidyGetNext(obj->node)) {
1516 RETURN_TRUE;
1517 } else {
1518 RETURN_FALSE;
1519 }
1520 }
1521 /* }}} */
1522
1523 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1524 PHP_METHOD(tidyNode, isComment)
1525 {
1526 TIDY_FETCH_ONLY_OBJECT;
1527
1528 if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1529 RETURN_TRUE;
1530 } else {
1531 RETURN_FALSE;
1532 }
1533 }
1534 /* }}} */
1535
1536 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1537 PHP_METHOD(tidyNode, isHtml)
1538 {
1539 TIDY_FETCH_ONLY_OBJECT;
1540
1541 switch (tidyNodeGetType(obj->node)) {
1542 case TidyNode_Start:
1543 case TidyNode_End:
1544 case TidyNode_StartEnd:
1545 RETURN_TRUE;
1546 default:
1547 RETURN_FALSE;
1548 }
1549 }
1550 /* }}} */
1551
1552 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1553 PHP_METHOD(tidyNode, isText)
1554 {
1555 TIDY_FETCH_ONLY_OBJECT;
1556
1557 if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1558 RETURN_TRUE;
1559 } else {
1560 RETURN_FALSE;
1561 }
1562 }
1563 /* }}} */
1564
1565 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1566 PHP_METHOD(tidyNode, isJste)
1567 {
1568 TIDY_FETCH_ONLY_OBJECT;
1569
1570 if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1571 RETURN_TRUE;
1572 } else {
1573 RETURN_FALSE;
1574 }
1575 }
1576 /* }}} */
1577
1578 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1579 PHP_METHOD(tidyNode, isAsp)
1580 {
1581 TIDY_FETCH_ONLY_OBJECT;
1582
1583 if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1584 RETURN_TRUE;
1585 } else {
1586 RETURN_FALSE;
1587 }
1588 }
1589 /* }}} */
1590
1591 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1592 PHP_METHOD(tidyNode, isPhp)
1593 {
1594 TIDY_FETCH_ONLY_OBJECT;
1595
1596 if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1597 RETURN_TRUE;
1598 } else {
1599 RETURN_FALSE;
1600 }
1601 }
1602 /* }}} */
1603
1604 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1605 PHP_METHOD(tidyNode, getParent)
1606 {
1607 TidyNode parent_node;
1608 PHPTidyObj *newobj;
1609 TIDY_FETCH_ONLY_OBJECT;
1610
1611 parent_node = tidyGetParent(obj->node);
1612 if(parent_node) {
1613 tidy_instantiate(tidy_ce_node, return_value);
1614 newobj = Z_TIDY_P(return_value);
1615 newobj->node = parent_node;
1616 newobj->type = is_node;
1617 newobj->ptdoc = obj->ptdoc;
1618 newobj->ptdoc->ref_count++;
1619 tidy_add_node_default_properties(newobj);
1620 } else {
1621 ZVAL_NULL(return_value);
1622 }
1623 }
1624 /* }}} */
1625
1626
1627 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1628 PHP_METHOD(tidyNode, __construct)
1629 {
1630 zend_throw_error(NULL, "You should not create a tidyNode manually");
1631 }
1632 /* }}} */
1633
1634 #endif
1635