1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: John Coggeshall <john@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include "php.h"
22 #include "php_tidy.h"
23
24 #ifdef HAVE_TIDY
25
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40
41 #include "tidy_arginfo.h"
42
43 /* compatibility with older versions of libtidy */
44 #ifndef TIDY_CALL
45 #define TIDY_CALL
46 #endif
47
48 /* {{{ ext/tidy macros */
49 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
50
51 #define TIDY_SET_CONTEXT \
52 zval *object = getThis();
53
54 #define TIDY_FETCH_OBJECT \
55 PHPTidyObj *obj; \
56 zval *object; \
57 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) == FAILURE) { \
58 RETURN_THROWS(); \
59 } \
60 obj = Z_TIDY_P(object); \
61
62 #define TIDY_FETCH_INITIALIZED_OBJECT \
63 TIDY_FETCH_OBJECT; \
64 if (!obj->ptdoc->initialized) { \
65 zend_throw_error(NULL, "tidy object is not initialized"); \
66 return; \
67 }
68
69 #define TIDY_FETCH_ONLY_OBJECT \
70 PHPTidyObj *obj; \
71 TIDY_SET_CONTEXT; \
72 if (zend_parse_parameters_none() == FAILURE) { \
73 RETURN_THROWS(); \
74 } \
75 obj = Z_TIDY_P(object); \
76
77 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
78 if (TG(default_config) && TG(default_config)[0]) { \
79 php_tidy_load_config(_doc, TG(default_config)); \
80 }
81 /* }}} */
82
83 /* {{{ ext/tidy structs */
84 typedef struct _PHPTidyDoc PHPTidyDoc;
85 typedef struct _PHPTidyObj PHPTidyObj;
86
87 typedef enum {
88 is_node,
89 is_doc
90 } tidy_obj_type;
91
92 typedef enum {
93 is_root_node,
94 is_html_node,
95 is_head_node,
96 is_body_node
97 } tidy_base_nodetypes;
98
99 struct _PHPTidyDoc {
100 TidyDoc doc;
101 TidyBuffer *errbuf;
102 unsigned int ref_count;
103 unsigned int initialized:1;
104 };
105
106 struct _PHPTidyObj {
107 TidyNode node;
108 tidy_obj_type type;
109 PHPTidyDoc *ptdoc;
110 zend_object std;
111 };
112
php_tidy_fetch_object(zend_object * obj)113 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
114 return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
115 }
116
117 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
118 /* }}} */
119
120 /* {{{ ext/tidy prototypes */
121 static zend_string *php_tidy_file_to_mem(char *, bool);
122 static void tidy_object_free_storage(zend_object *);
123 static zend_object *tidy_object_new_node(zend_class_entry *);
124 static zend_object *tidy_object_new_doc(zend_class_entry *);
125 static zval *tidy_instantiate(zend_class_entry *, zval *);
126 static zend_result tidy_doc_cast_handler(zend_object *, zval *, int);
127 static zend_result tidy_node_cast_handler(zend_object *, zval *, int);
128 static void tidy_doc_update_properties(PHPTidyObj *);
129 static void tidy_add_node_default_properties(PHPTidyObj *);
130 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
131 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
132 static int _php_tidy_set_tidy_opt(TidyDoc, char *, zval *);
133 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options);
134 static PHP_INI_MH(php_tidy_set_clean_output);
135 static void php_tidy_clean_output_start(const char *name, size_t name_len);
136 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
137 static int php_tidy_output_handler(void **nothing, php_output_context *output_context);
138
139 static PHP_MINIT_FUNCTION(tidy);
140 static PHP_MSHUTDOWN_FUNCTION(tidy);
141 static PHP_RINIT_FUNCTION(tidy);
142 static PHP_RSHUTDOWN_FUNCTION(tidy);
143 static PHP_MINFO_FUNCTION(tidy);
144
145 ZEND_DECLARE_MODULE_GLOBALS(tidy)
146
147 PHP_INI_BEGIN()
148 STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals)
149 STD_PHP_INI_BOOLEAN("tidy.clean_output", "0", PHP_INI_USER, php_tidy_set_clean_output, clean_output, zend_tidy_globals, tidy_globals)
150 PHP_INI_END()
151
152 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
153
154 static zend_object_handlers tidy_object_handlers_doc;
155 static zend_object_handlers tidy_object_handlers_node;
156
157 zend_module_entry tidy_module_entry = {
158 STANDARD_MODULE_HEADER,
159 "tidy",
160 ext_functions,
161 PHP_MINIT(tidy),
162 PHP_MSHUTDOWN(tidy),
163 PHP_RINIT(tidy),
164 PHP_RSHUTDOWN(tidy),
165 PHP_MINFO(tidy),
166 PHP_TIDY_VERSION,
167 PHP_MODULE_GLOBALS(tidy),
168 NULL,
169 NULL,
170 NULL,
171 STANDARD_MODULE_PROPERTIES_EX
172 };
173
174 #ifdef COMPILE_DL_TIDY
175 #ifdef ZTS
176 ZEND_TSRMLS_CACHE_DEFINE()
177 #endif
ZEND_GET_MODULE(tidy)178 ZEND_GET_MODULE(tidy)
179 #endif
180
181 static void* TIDY_CALL php_tidy_malloc(size_t len)
182 {
183 return emalloc(len);
184 }
185
php_tidy_realloc(void * buf,size_t len)186 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
187 {
188 return erealloc(buf, len);
189 }
190
php_tidy_free(void * buf)191 static void TIDY_CALL php_tidy_free(void *buf)
192 {
193 efree(buf);
194 }
195
php_tidy_panic(ctmbstr msg)196 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
197 {
198 php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (char *)msg);
199 }
200
php_tidy_load_config(TidyDoc doc,const char * path)201 static void php_tidy_load_config(TidyDoc doc, const char *path)
202 {
203 int ret = tidyLoadConfig(doc, path);
204 if (ret < 0) {
205 php_error_docref(NULL, E_WARNING, "Could not load the Tidy configuration file \"%s\"", path);
206 } else if (ret > 0) {
207 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the Tidy configuration file \"%s\"", path);
208 }
209 }
210
php_tidy_apply_config(TidyDoc doc,zend_string * str_string,HashTable * ht_options)211 static zend_result php_tidy_apply_config(TidyDoc doc, zend_string *str_string, HashTable *ht_options)
212 {
213 if (ht_options) {
214 return _php_tidy_apply_config_array(doc, ht_options);
215 } else if (str_string) {
216 if (php_check_open_basedir(ZSTR_VAL(str_string))) {
217 return FAILURE;
218 }
219 php_tidy_load_config(doc, ZSTR_VAL(str_string));
220 }
221 return SUCCESS;
222 }
223
_php_tidy_set_tidy_opt(TidyDoc doc,char * optname,zval * value)224 static int _php_tidy_set_tidy_opt(TidyDoc doc, char *optname, zval *value)
225 {
226 TidyOption opt = tidyGetOptionByName(doc, optname);
227 zend_string *str, *tmp_str;
228 zend_long lval;
229
230 if (!opt) {
231 php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
232 return FAILURE;
233 }
234
235 if (tidyOptIsReadOnly(opt)) {
236 php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
237 return FAILURE;
238 }
239
240 switch(tidyOptGetType(opt)) {
241 case TidyString:
242 str = zval_get_tmp_string(value, &tmp_str);
243 if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
244 zend_tmp_string_release(tmp_str);
245 return SUCCESS;
246 }
247 zend_tmp_string_release(tmp_str);
248 break;
249
250 case TidyInteger:
251 lval = zval_get_long(value);
252 if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
253 return SUCCESS;
254 }
255 break;
256
257 case TidyBoolean:
258 lval = zval_get_long(value);
259 if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
260 return SUCCESS;
261 }
262 break;
263
264 default:
265 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
266 break;
267 }
268
269 return FAILURE;
270 }
271
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,bool is_file)272 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, bool is_file)
273 {
274 char *enc = NULL;
275 size_t enc_len = 0;
276 TidyDoc doc;
277 TidyBuffer *errbuf;
278 zend_string *data, *arg1, *config_str = NULL;
279 HashTable *config_ht = NULL;
280
281 if (is_file) {
282 bool use_include_path = 0;
283
284 ZEND_PARSE_PARAMETERS_START(1, 4)
285 Z_PARAM_PATH_STR(arg1)
286 Z_PARAM_OPTIONAL
287 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
288 Z_PARAM_STRING(enc, enc_len)
289 Z_PARAM_BOOL(use_include_path)
290 ZEND_PARSE_PARAMETERS_END();
291
292 if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
293 RETURN_FALSE;
294 }
295 } else {
296 ZEND_PARSE_PARAMETERS_START(1, 3)
297 Z_PARAM_STR(arg1)
298 Z_PARAM_OPTIONAL
299 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
300 Z_PARAM_STRING(enc, enc_len)
301 ZEND_PARSE_PARAMETERS_END();
302
303 data = arg1;
304 }
305
306 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
307 if (is_file) {
308 zend_string_release_ex(data, false);
309 zend_argument_value_error(1, "Input string is too long");
310 } else {
311 zend_argument_value_error(1, "is too long");
312 }
313 RETURN_THROWS();
314 }
315
316 doc = tidyCreate();
317 errbuf = emalloc(sizeof(TidyBuffer));
318 tidyBufInit(errbuf);
319
320 if (tidySetErrorBuffer(doc, errbuf) != 0) {
321 tidyBufFree(errbuf);
322 efree(errbuf);
323 tidyRelease(doc);
324 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
325 }
326
327 tidyOptSetBool(doc, TidyForceOutput, yes);
328 tidyOptSetBool(doc, TidyMark, no);
329
330 TIDY_SET_DEFAULT_CONFIG(doc);
331
332 if (php_tidy_apply_config(doc, config_str, config_ht) != SUCCESS) {
333 RETVAL_FALSE;
334 } else if (enc_len) {
335 if (tidySetCharEncoding(doc, enc) < 0) {
336 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
337 RETVAL_FALSE;
338 }
339 }
340
341 if (data) {
342 TidyBuffer buf;
343
344 tidyBufInit(&buf);
345 tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
346
347 if (tidyParseBuffer(doc, &buf) < 0) {
348 php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
349 RETVAL_FALSE;
350 } else {
351 if (tidyCleanAndRepair(doc) >= 0) {
352 TidyBuffer output;
353 tidyBufInit(&output);
354
355 tidySaveBuffer (doc, &output);
356 FIX_BUFFER(&output);
357 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
358 tidyBufFree(&output);
359 } else {
360 RETVAL_FALSE;
361 }
362 }
363 }
364
365 if (is_file) {
366 zend_string_release_ex(data, 0);
367 }
368
369 tidyBufFree(errbuf);
370 efree(errbuf);
371 tidyRelease(doc);
372 }
373
php_tidy_file_to_mem(char * filename,bool use_include_path)374 static zend_string *php_tidy_file_to_mem(char *filename, bool use_include_path)
375 {
376 php_stream *stream;
377 zend_string *data = NULL;
378
379 if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
380 return NULL;
381 }
382 if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
383 data = ZSTR_EMPTY_ALLOC();
384 }
385 php_stream_close(stream);
386
387 return data;
388 }
389
tidy_object_free_storage(zend_object * object)390 static void tidy_object_free_storage(zend_object *object)
391 {
392 PHPTidyObj *intern = php_tidy_fetch_object(object);
393
394 zend_object_std_dtor(&intern->std);
395
396 if (intern->ptdoc) {
397 intern->ptdoc->ref_count--;
398
399 if (intern->ptdoc->ref_count <= 0) {
400 tidyBufFree(intern->ptdoc->errbuf);
401 efree(intern->ptdoc->errbuf);
402 tidyRelease(intern->ptdoc->doc);
403 efree(intern->ptdoc);
404 }
405 }
406 }
407
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)408 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
409 {
410 PHPTidyObj *intern;
411
412 intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
413 zend_object_std_init(&intern->std, class_type);
414 object_properties_init(&intern->std, class_type);
415
416 switch(objtype) {
417 case is_node:
418 break;
419
420 case is_doc:
421 intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
422 intern->ptdoc->doc = tidyCreate();
423 intern->ptdoc->ref_count = 1;
424 intern->ptdoc->initialized = 0;
425 intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
426 tidyBufInit(intern->ptdoc->errbuf);
427
428 if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
429 tidyBufFree(intern->ptdoc->errbuf);
430 efree(intern->ptdoc->errbuf);
431 tidyRelease(intern->ptdoc->doc);
432 efree(intern->ptdoc);
433 efree(intern);
434 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
435 }
436
437 tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
438 tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
439
440 TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
441 break;
442 }
443
444 intern->std.handlers = handlers;
445
446 return &intern->std;
447 }
448
tidy_object_new_node(zend_class_entry * class_type)449 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
450 {
451 return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
452 }
453
tidy_object_new_doc(zend_class_entry * class_type)454 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
455 {
456 return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
457 }
458
tidy_instantiate(zend_class_entry * pce,zval * object)459 static zval *tidy_instantiate(zend_class_entry *pce, zval *object)
460 {
461 object_init_ex(object, pce);
462 return object;
463 }
464
tidy_doc_cast_handler(zend_object * in,zval * out,int type)465 static zend_result tidy_doc_cast_handler(zend_object *in, zval *out, int type)
466 {
467 TidyBuffer output;
468 PHPTidyObj *obj;
469
470 switch (type) {
471 case IS_LONG:
472 case _IS_NUMBER:
473 ZVAL_LONG(out, 0);
474 break;
475
476 case IS_DOUBLE:
477 ZVAL_DOUBLE(out, 0);
478 break;
479
480 case _IS_BOOL:
481 ZVAL_TRUE(out);
482 break;
483
484 case IS_STRING:
485 obj = php_tidy_fetch_object(in);
486 tidyBufInit(&output);
487 tidySaveBuffer (obj->ptdoc->doc, &output);
488 if (output.size) {
489 ZVAL_STRINGL(out, (char *) output.bp, output.size-1);
490 } else {
491 ZVAL_EMPTY_STRING(out);
492 }
493 tidyBufFree(&output);
494 break;
495
496 default:
497 return FAILURE;
498 }
499
500 return SUCCESS;
501 }
502
tidy_node_cast_handler(zend_object * in,zval * out,int type)503 static zend_result tidy_node_cast_handler(zend_object *in, zval *out, int type)
504 {
505 TidyBuffer buf;
506 PHPTidyObj *obj;
507
508 switch(type) {
509 case IS_LONG:
510 case _IS_NUMBER:
511 ZVAL_LONG(out, 0);
512 break;
513
514 case IS_DOUBLE:
515 ZVAL_DOUBLE(out, 0);
516 break;
517
518 case _IS_BOOL:
519 ZVAL_TRUE(out);
520 break;
521
522 case IS_STRING:
523 obj = php_tidy_fetch_object(in);
524 tidyBufInit(&buf);
525 if (obj->ptdoc) {
526 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
527 ZVAL_STRINGL(out, (char *) buf.bp, buf.size-1);
528 } else {
529 ZVAL_EMPTY_STRING(out);
530 }
531 tidyBufFree(&buf);
532 break;
533
534 default:
535 return FAILURE;
536 }
537
538 return SUCCESS;
539 }
540
tidy_doc_update_properties(PHPTidyObj * obj)541 static void tidy_doc_update_properties(PHPTidyObj *obj)
542 {
543 TidyBuffer output;
544
545 tidyBufInit(&output);
546 tidySaveBuffer (obj->ptdoc->doc, &output);
547
548 if (output.size) {
549 zend_update_property_stringl(
550 tidy_ce_doc,
551 &obj->std,
552 "value",
553 sizeof("value") - 1,
554 (char*) output.bp,
555 output.size-1
556 );
557 }
558
559 tidyBufFree(&output);
560
561 if (obj->ptdoc->errbuf->size) {
562 zend_update_property_stringl(
563 tidy_ce_doc,
564 &obj->std,
565 "errorBuffer",
566 sizeof("errorBuffer") - 1,
567 (char*) obj->ptdoc->errbuf->bp,
568 obj->ptdoc->errbuf->size-1
569 );
570 }
571 }
572
tidy_add_node_default_properties(PHPTidyObj * obj)573 static void tidy_add_node_default_properties(PHPTidyObj *obj)
574 {
575 TidyBuffer buf;
576 TidyAttr tempattr;
577 TidyNode tempnode;
578 zval attribute, children, temp;
579 PHPTidyObj *newobj;
580 char *name;
581
582 tidyBufInit(&buf);
583 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
584
585 zend_update_property_stringl(
586 tidy_ce_node,
587 &obj->std,
588 "value",
589 sizeof("value") - 1,
590 buf.size ? (char *) buf.bp : "",
591 buf.size ? buf.size - 1 : 0
592 );
593
594 tidyBufFree(&buf);
595
596 name = (char *) tidyNodeGetName(obj->node);
597
598 zend_update_property_string(
599 tidy_ce_node,
600 &obj->std,
601 "name",
602 sizeof("name") - 1,
603 name ? name : ""
604 );
605
606 zend_update_property_long(
607 tidy_ce_node,
608 &obj->std,
609 "type",
610 sizeof("type") - 1,
611 tidyNodeGetType(obj->node)
612 );
613
614 zend_update_property_long(
615 tidy_ce_node,
616 &obj->std,
617 "line",
618 sizeof("line") - 1,
619 tidyNodeLine(obj->node)
620 );
621
622 zend_update_property_long(
623 tidy_ce_node,
624 &obj->std,
625 "column",
626 sizeof("column") - 1,
627 tidyNodeColumn(obj->node)
628 );
629
630 zend_update_property_bool(
631 tidy_ce_node,
632 &obj->std,
633 "proprietary",
634 sizeof("proprietary") - 1,
635 tidyNodeIsProp(obj->ptdoc->doc, obj->node)
636 );
637
638 switch(tidyNodeGetType(obj->node)) {
639 case TidyNode_Root:
640 case TidyNode_DocType:
641 case TidyNode_Text:
642 case TidyNode_Comment:
643 zend_update_property_null(
644 tidy_ce_node,
645 &obj->std,
646 "id",
647 sizeof("id") - 1
648 );
649 break;
650
651 default:
652 zend_update_property_long(
653 tidy_ce_node,
654 &obj->std,
655 "id",
656 sizeof("id") - 1,
657 tidyNodeGetId(obj->node)
658 );
659 }
660
661 tempattr = tidyAttrFirst(obj->node);
662
663 if (tempattr) {
664 char *name, *val;
665 array_init(&attribute);
666
667 do {
668 name = (char *)tidyAttrName(tempattr);
669 val = (char *)tidyAttrValue(tempattr);
670 if (name) {
671 if (val) {
672 add_assoc_string(&attribute, name, val);
673 } else {
674 add_assoc_str(&attribute, name, zend_empty_string);
675 }
676 }
677 } while((tempattr = tidyAttrNext(tempattr)));
678 } else {
679 ZVAL_NULL(&attribute);
680 }
681
682 zend_update_property(
683 tidy_ce_node,
684 &obj->std,
685 "attribute",
686 sizeof("attribute") - 1,
687 &attribute
688 );
689
690 zval_ptr_dtor(&attribute);
691
692 tempnode = tidyGetChild(obj->node);
693
694 if (tempnode) {
695 array_init(&children);
696 do {
697 tidy_instantiate(tidy_ce_node, &temp);
698 newobj = Z_TIDY_P(&temp);
699 newobj->node = tempnode;
700 newobj->type = is_node;
701 newobj->ptdoc = obj->ptdoc;
702 newobj->ptdoc->ref_count++;
703
704 tidy_add_node_default_properties(newobj);
705 add_next_index_zval(&children, &temp);
706
707 } while((tempnode = tidyGetNext(tempnode)));
708
709 } else {
710 ZVAL_NULL(&children);
711 }
712
713 zend_update_property(
714 tidy_ce_node,
715 &obj->std,
716 "child",
717 sizeof("child") - 1,
718 &children
719 );
720
721 zval_ptr_dtor(&children);
722 }
723
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)724 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
725 {
726 *type = tidyOptGetType(opt);
727
728 switch (*type) {
729 case TidyString: {
730 char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
731 if (val) {
732 return (void *) zend_string_init(val, strlen(val), 0);
733 } else {
734 return (void *) ZSTR_EMPTY_ALLOC();
735 }
736 }
737 break;
738
739 case TidyInteger:
740 return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
741 break;
742
743 case TidyBoolean:
744 return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
745 break;
746 }
747
748 /* should not happen */
749 return NULL;
750 }
751
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)752 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
753 {
754 PHPTidyObj *newobj;
755 TidyNode node;
756 TIDY_FETCH_OBJECT;
757
758 switch (node_type) {
759 case is_root_node:
760 node = tidyGetRoot(obj->ptdoc->doc);
761 break;
762
763 case is_html_node:
764 node = tidyGetHtml(obj->ptdoc->doc);
765 break;
766
767 case is_head_node:
768 node = tidyGetHead(obj->ptdoc->doc);
769 break;
770
771 case is_body_node:
772 node = tidyGetBody(obj->ptdoc->doc);
773 break;
774
775 EMPTY_SWITCH_DEFAULT_CASE()
776 }
777
778 if (!node) {
779 RETURN_NULL();
780 }
781
782 tidy_instantiate(tidy_ce_node, return_value);
783 newobj = Z_TIDY_P(return_value);
784 newobj->type = is_node;
785 newobj->ptdoc = obj->ptdoc;
786 newobj->node = node;
787 newobj->ptdoc->ref_count++;
788
789 tidy_add_node_default_properties(newobj);
790 }
791
_php_tidy_apply_config_array(TidyDoc doc,HashTable * ht_options)792 static int _php_tidy_apply_config_array(TidyDoc doc, HashTable *ht_options)
793 {
794 zval *opt_val;
795 zend_string *opt_name;
796
797 if (!HT_IS_PACKED(ht_options)) {
798 ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
799 if (opt_name == NULL) {
800 continue;
801 }
802 _php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
803 } ZEND_HASH_FOREACH_END();
804 }
805 return SUCCESS;
806 }
807
php_tidy_parse_string(PHPTidyObj * obj,char * string,uint32_t len,char * enc)808 static int php_tidy_parse_string(PHPTidyObj *obj, char *string, uint32_t len, char *enc)
809 {
810 TidyBuffer buf;
811
812 if(enc) {
813 if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
814 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
815 return FAILURE;
816 }
817 }
818
819 obj->ptdoc->initialized = 1;
820
821 tidyBufInit(&buf);
822 tidyBufAttach(&buf, (byte *) string, len);
823 if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
824 php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
825 return FAILURE;
826 }
827 tidy_doc_update_properties(obj);
828
829 return SUCCESS;
830 }
831
PHP_MINIT_FUNCTION(tidy)832 static PHP_MINIT_FUNCTION(tidy)
833 {
834 tidySetMallocCall(php_tidy_malloc);
835 tidySetReallocCall(php_tidy_realloc);
836 tidySetFreeCall(php_tidy_free);
837 tidySetPanicCall(php_tidy_panic);
838
839 REGISTER_INI_ENTRIES();
840
841 tidy_ce_doc = register_class_tidy();
842 tidy_ce_doc->create_object = tidy_object_new_doc;
843 memcpy(&tidy_object_handlers_doc, &std_object_handlers, sizeof(zend_object_handlers));
844 tidy_object_handlers_doc.clone_obj = NULL;
845
846 tidy_ce_node = register_class_tidyNode();
847 tidy_ce_node->create_object = tidy_object_new_node;
848 memcpy(&tidy_object_handlers_node, &std_object_handlers, sizeof(zend_object_handlers));
849 tidy_object_handlers_node.clone_obj = NULL;
850
851 tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
852 tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
853
854 tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
855 tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
856
857 register_tidy_symbols(module_number);
858
859 php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
860
861 return SUCCESS;
862 }
863
PHP_RINIT_FUNCTION(tidy)864 static PHP_RINIT_FUNCTION(tidy)
865 {
866 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
867 ZEND_TSRMLS_CACHE_UPDATE();
868 #endif
869
870 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
871
872 return SUCCESS;
873 }
874
PHP_RSHUTDOWN_FUNCTION(tidy)875 static PHP_RSHUTDOWN_FUNCTION(tidy)
876 {
877 TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
878
879 return SUCCESS;
880 }
881
PHP_MSHUTDOWN_FUNCTION(tidy)882 static PHP_MSHUTDOWN_FUNCTION(tidy)
883 {
884 UNREGISTER_INI_ENTRIES();
885 return SUCCESS;
886 }
887
PHP_MINFO_FUNCTION(tidy)888 static PHP_MINFO_FUNCTION(tidy)
889 {
890 php_info_print_table_start();
891 php_info_print_table_row(2, "Tidy support", "enabled");
892 #ifdef HAVE_TIDYBUFFIO_H
893 php_info_print_table_row(2, "libTidy Version", (char *)tidyLibraryVersion());
894 #elif defined(HAVE_TIDYP_H)
895 php_info_print_table_row(2, "libtidyp Version", (char *)tidyVersion());
896 #endif
897 #ifdef HAVE_TIDYRELEASEDATE
898 php_info_print_table_row(2, "libTidy Release", (char *)tidyReleaseDate());
899 #endif
900 php_info_print_table_end();
901
902 DISPLAY_INI_ENTRIES();
903 }
904
PHP_INI_MH(php_tidy_set_clean_output)905 static PHP_INI_MH(php_tidy_set_clean_output)
906 {
907 int status;
908 bool value;
909
910 value = zend_ini_parse_bool(new_value);
911
912 if (stage == PHP_INI_STAGE_RUNTIME) {
913 status = php_output_get_status();
914
915 if (value && (status & PHP_OUTPUT_WRITTEN)) {
916 php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
917 return FAILURE;
918 }
919 if (status & PHP_OUTPUT_SENT) {
920 php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
921 return FAILURE;
922 }
923 }
924
925 status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
926
927 if (stage == PHP_INI_STAGE_RUNTIME && value) {
928 if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
929 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
930 }
931 }
932
933 return status;
934 }
935
936 /*
937 * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
938 */
939
php_tidy_clean_output_start(const char * name,size_t name_len)940 static void php_tidy_clean_output_start(const char *name, size_t name_len)
941 {
942 php_output_handler *h;
943
944 if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
945 php_output_handler_start(h);
946 }
947 }
948
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)949 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
950 {
951 if (chunk_size) {
952 php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
953 return NULL;
954 }
955 if (!TG(clean_output)) {
956 TG(clean_output) = 1;
957 }
958 return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
959 }
960
php_tidy_output_handler(void ** nothing,php_output_context * output_context)961 static int php_tidy_output_handler(void **nothing, php_output_context *output_context)
962 {
963 int status = FAILURE;
964 TidyDoc doc;
965 TidyBuffer inbuf, outbuf, errbuf;
966
967 if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
968 doc = tidyCreate();
969 tidyBufInit(&errbuf);
970
971 if (0 == tidySetErrorBuffer(doc, &errbuf)) {
972 tidyOptSetBool(doc, TidyForceOutput, yes);
973 tidyOptSetBool(doc, TidyMark, no);
974
975 if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
976 php_error_docref(NULL, E_WARNING, "Input string is too long");
977 return status;
978 }
979
980 TIDY_SET_DEFAULT_CONFIG(doc);
981
982 tidyBufInit(&inbuf);
983 tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
984
985 if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
986 tidyBufInit(&outbuf);
987 tidySaveBuffer(doc, &outbuf);
988 FIX_BUFFER(&outbuf);
989 output_context->out.data = (char *) outbuf.bp;
990 output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
991 output_context->out.free = 1;
992 status = SUCCESS;
993 }
994 }
995
996 tidyRelease(doc);
997 tidyBufFree(&errbuf);
998 }
999
1000 return status;
1001 }
1002
1003 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1004 PHP_FUNCTION(tidy_parse_string)
1005 {
1006 char *enc = NULL;
1007 size_t enc_len = 0;
1008 zend_string *input, *options_str = NULL;
1009 HashTable *options_ht = NULL;
1010 PHPTidyObj *obj;
1011
1012 ZEND_PARSE_PARAMETERS_START(1, 3)
1013 Z_PARAM_STR(input)
1014 Z_PARAM_OPTIONAL
1015 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1016 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1017 ZEND_PARSE_PARAMETERS_END();
1018
1019 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1020 zend_argument_value_error(1, "is too long");
1021 RETURN_THROWS();
1022 }
1023
1024 tidy_instantiate(tidy_ce_doc, return_value);
1025 obj = Z_TIDY_P(return_value);
1026
1027 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1028 || php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == FAILURE) {
1029 zval_ptr_dtor(return_value);
1030 RETURN_FALSE;
1031 }
1032 }
1033 /* }}} */
1034
1035 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1036 PHP_FUNCTION(tidy_get_error_buffer)
1037 {
1038 TIDY_FETCH_OBJECT;
1039
1040 if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1041 RETURN_STRINGL((char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1042 } else {
1043 RETURN_FALSE;
1044 }
1045 }
1046 /* }}} */
1047
1048 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1049 PHP_FUNCTION(tidy_get_output)
1050 {
1051 TidyBuffer output;
1052 TIDY_FETCH_OBJECT;
1053
1054 tidyBufInit(&output);
1055 tidySaveBuffer(obj->ptdoc->doc, &output);
1056 FIX_BUFFER(&output);
1057 RETVAL_STRINGL((char *) output.bp, output.size ? output.size-1 : 0);
1058 tidyBufFree(&output);
1059 }
1060 /* }}} */
1061
1062 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1063 PHP_FUNCTION(tidy_parse_file)
1064 {
1065 char *enc = NULL;
1066 size_t enc_len = 0;
1067 bool use_include_path = 0;
1068 zend_string *inputfile, *contents, *options_str = NULL;
1069 HashTable *options_ht = NULL;
1070
1071 PHPTidyObj *obj;
1072
1073 ZEND_PARSE_PARAMETERS_START(1, 4)
1074 Z_PARAM_PATH_STR(inputfile)
1075 Z_PARAM_OPTIONAL
1076 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1077 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1078 Z_PARAM_BOOL(use_include_path)
1079 ZEND_PARSE_PARAMETERS_END();
1080
1081 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1082 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1083 RETURN_FALSE;
1084 }
1085
1086 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1087 zend_string_release_ex(contents, 0);
1088 zend_value_error("Input string is too long");
1089 RETURN_THROWS();
1090 }
1091
1092 tidy_instantiate(tidy_ce_doc, return_value);
1093 obj = Z_TIDY_P(return_value);
1094
1095 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1096 || php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1097 zval_ptr_dtor(return_value);
1098 RETVAL_FALSE;
1099 }
1100
1101 zend_string_release_ex(contents, 0);
1102 }
1103 /* }}} */
1104
1105 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1106 PHP_FUNCTION(tidy_clean_repair)
1107 {
1108 TIDY_FETCH_OBJECT;
1109
1110 if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1111 tidy_doc_update_properties(obj);
1112 RETURN_TRUE;
1113 }
1114
1115 RETURN_FALSE;
1116 }
1117 /* }}} */
1118
1119 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1120 PHP_FUNCTION(tidy_repair_string)
1121 {
1122 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1123 }
1124 /* }}} */
1125
1126 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1127 PHP_FUNCTION(tidy_repair_file)
1128 {
1129 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1130 }
1131 /* }}} */
1132
1133 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1134 PHP_FUNCTION(tidy_diagnose)
1135 {
1136 TIDY_FETCH_OBJECT;
1137
1138 if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1139 tidy_doc_update_properties(obj);
1140 RETURN_TRUE;
1141 }
1142
1143 RETURN_FALSE;
1144 }
1145 /* }}} */
1146
1147 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1148 PHP_FUNCTION(tidy_get_release)
1149 {
1150 if (zend_parse_parameters_none() == FAILURE) {
1151 RETURN_THROWS();
1152 }
1153
1154 #ifdef HAVE_TIDYRELEASEDATE
1155 RETURN_STRING((char *)tidyReleaseDate());
1156 #else
1157 RETURN_STRING((char *)"unknown");
1158 #endif
1159 }
1160 /* }}} */
1161
1162
1163 #ifdef HAVE_TIDYOPTGETDOC
1164 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1165 PHP_FUNCTION(tidy_get_opt_doc)
1166 {
1167 PHPTidyObj *obj;
1168 char *optval, *optname;
1169 size_t optname_len;
1170 TidyOption opt;
1171 zval *object;
1172
1173 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1174 RETURN_THROWS();
1175 }
1176
1177 obj = Z_TIDY_P(object);
1178
1179 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1180
1181 if (!opt) {
1182 zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1183 RETURN_THROWS();
1184 }
1185
1186 if ( (optval = (char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1187 RETURN_STRING(optval);
1188 }
1189
1190 RETURN_FALSE;
1191 }
1192 /* }}} */
1193 #endif
1194
1195
1196 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1197 PHP_FUNCTION(tidy_get_config)
1198 {
1199 TidyIterator itOpt;
1200 char *opt_name;
1201 void *opt_value;
1202 TidyOptionType optt;
1203
1204 TIDY_FETCH_OBJECT;
1205
1206 itOpt = tidyGetOptionList(obj->ptdoc->doc);
1207
1208 array_init(return_value);
1209
1210 while (itOpt) {
1211 TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1212
1213 opt_name = (char *)tidyOptGetName(opt);
1214 opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1215 switch (optt) {
1216 case TidyString:
1217 add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1218 break;
1219
1220 case TidyInteger:
1221 add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1222 break;
1223
1224 case TidyBoolean:
1225 add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1226 break;
1227 }
1228 }
1229
1230 return;
1231 }
1232 /* }}} */
1233
1234 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1235 PHP_FUNCTION(tidy_get_status)
1236 {
1237 TIDY_FETCH_OBJECT;
1238
1239 RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1240 }
1241 /* }}} */
1242
1243 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1244 PHP_FUNCTION(tidy_get_html_ver)
1245 {
1246 TIDY_FETCH_INITIALIZED_OBJECT;
1247
1248 RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1249 }
1250 /* }}} */
1251
1252 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1253 PHP_FUNCTION(tidy_is_xhtml)
1254 {
1255 TIDY_FETCH_INITIALIZED_OBJECT;
1256
1257 RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1258 }
1259 /* }}} */
1260
1261 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1262 PHP_FUNCTION(tidy_is_xml)
1263 {
1264 TIDY_FETCH_INITIALIZED_OBJECT;
1265
1266 RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1267 }
1268 /* }}} */
1269
1270 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1271 PHP_FUNCTION(tidy_error_count)
1272 {
1273 TIDY_FETCH_OBJECT;
1274
1275 RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1276 }
1277 /* }}} */
1278
1279 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1280 PHP_FUNCTION(tidy_warning_count)
1281 {
1282 TIDY_FETCH_OBJECT;
1283
1284 RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1285 }
1286 /* }}} */
1287
1288 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1289 PHP_FUNCTION(tidy_access_count)
1290 {
1291 TIDY_FETCH_OBJECT;
1292
1293 RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1294 }
1295 /* }}} */
1296
1297 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1298 PHP_FUNCTION(tidy_config_count)
1299 {
1300 TIDY_FETCH_OBJECT;
1301
1302 RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1303 }
1304 /* }}} */
1305
1306 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1307 PHP_FUNCTION(tidy_getopt)
1308 {
1309 PHPTidyObj *obj;
1310 char *optname;
1311 void *optval;
1312 size_t optname_len;
1313 TidyOption opt;
1314 TidyOptionType optt;
1315 zval *object;
1316
1317 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) == FAILURE) {
1318 RETURN_THROWS();
1319 }
1320
1321 obj = Z_TIDY_P(object);
1322
1323 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1324
1325 if (!opt) {
1326 zend_argument_value_error(getThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1327 RETURN_THROWS();
1328 }
1329
1330 optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1331 switch (optt) {
1332 case TidyString:
1333 RETVAL_STR((zend_string*)optval);
1334 return;
1335
1336 case TidyInteger:
1337 RETURN_LONG((zend_long)optval);
1338 break;
1339
1340 case TidyBoolean:
1341 if (optval) {
1342 RETURN_TRUE;
1343 } else {
1344 RETURN_FALSE;
1345 }
1346 break;
1347
1348 default:
1349 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1350 break;
1351 }
1352
1353 RETURN_FALSE;
1354 }
1355 /* }}} */
1356
PHP_METHOD(tidy,__construct)1357 PHP_METHOD(tidy, __construct)
1358 {
1359 char *enc = NULL;
1360 size_t enc_len = 0;
1361 bool use_include_path = 0;
1362 HashTable *options_ht = NULL;
1363 zend_string *contents, *inputfile = NULL, *options_str = NULL;
1364 PHPTidyObj *obj;
1365
1366 ZEND_PARSE_PARAMETERS_START(0, 4)
1367 Z_PARAM_OPTIONAL
1368 Z_PARAM_PATH_STR_OR_NULL(inputfile)
1369 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1370 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1371 Z_PARAM_BOOL(use_include_path)
1372 ZEND_PARSE_PARAMETERS_END();
1373
1374 TIDY_SET_CONTEXT;
1375 obj = Z_TIDY_P(object);
1376
1377 if (inputfile) {
1378 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1379 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1380 return;
1381 }
1382
1383 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1384 zend_string_release_ex(contents, 0);
1385 zend_value_error("Input string is too long");
1386 RETURN_THROWS();
1387 }
1388
1389 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS) {
1390 /* TODO: this is the constructor, we should throw probably... */
1391 zend_string_release_ex(contents, 0);
1392 RETURN_FALSE;
1393 }
1394
1395 php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1396
1397 zend_string_release_ex(contents, 0);
1398 }
1399 }
1400
PHP_METHOD(tidy,parseFile)1401 PHP_METHOD(tidy, parseFile)
1402 {
1403 char *enc = NULL;
1404 size_t enc_len = 0;
1405 bool use_include_path = 0;
1406 HashTable *options_ht = NULL;
1407 zend_string *inputfile, *contents, *options_str = NULL;
1408 PHPTidyObj *obj;
1409
1410 ZEND_PARSE_PARAMETERS_START(1, 4)
1411 Z_PARAM_PATH_STR(inputfile)
1412 Z_PARAM_OPTIONAL
1413 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1414 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1415 Z_PARAM_BOOL(use_include_path)
1416 ZEND_PARSE_PARAMETERS_END();
1417
1418 TIDY_SET_CONTEXT;
1419 obj = Z_TIDY_P(object);
1420
1421 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1422 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1423 RETURN_FALSE;
1424 }
1425
1426 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1427 zend_string_release_ex(contents, 0);
1428 zend_value_error("Input string is too long");
1429 RETURN_THROWS();
1430 }
1431
1432 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1433 || php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == FAILURE) {
1434 RETVAL_FALSE;
1435 } else {
1436 RETVAL_TRUE;
1437 }
1438
1439 zend_string_release_ex(contents, 0);
1440 }
1441
PHP_METHOD(tidy,parseString)1442 PHP_METHOD(tidy, parseString)
1443 {
1444 char *enc = NULL;
1445 size_t enc_len = 0;
1446 HashTable *options_ht = NULL;
1447 PHPTidyObj *obj;
1448 zend_string *input, *options_str = NULL;
1449
1450 ZEND_PARSE_PARAMETERS_START(1, 3)
1451 Z_PARAM_STR(input)
1452 Z_PARAM_OPTIONAL
1453 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1454 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1455 ZEND_PARSE_PARAMETERS_END();
1456
1457 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1458 zend_argument_value_error(1, "is too long");
1459 RETURN_THROWS();
1460 }
1461
1462 TIDY_SET_CONTEXT;
1463 obj = Z_TIDY_P(object);
1464
1465 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) == SUCCESS
1466 && php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS) {
1467 RETURN_TRUE;
1468 }
1469
1470 RETURN_FALSE;
1471 }
1472
1473
1474 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1475 PHP_FUNCTION(tidy_get_root)
1476 {
1477 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1478 }
1479 /* }}} */
1480
1481 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1482 PHP_FUNCTION(tidy_get_html)
1483 {
1484 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1485 }
1486 /* }}} */
1487
1488 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1489 PHP_FUNCTION(tidy_get_head)
1490 {
1491 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1492 }
1493 /* }}} */
1494
1495 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1496 PHP_FUNCTION(tidy_get_body)
1497 {
1498 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1499 }
1500 /* }}} */
1501
1502 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1503 PHP_METHOD(tidyNode, hasChildren)
1504 {
1505 TIDY_FETCH_ONLY_OBJECT;
1506
1507 if (tidyGetChild(obj->node)) {
1508 RETURN_TRUE;
1509 } else {
1510 RETURN_FALSE;
1511 }
1512 }
1513 /* }}} */
1514
1515 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1516 PHP_METHOD(tidyNode, hasSiblings)
1517 {
1518 TIDY_FETCH_ONLY_OBJECT;
1519
1520 if (obj->node && tidyGetNext(obj->node)) {
1521 RETURN_TRUE;
1522 } else {
1523 RETURN_FALSE;
1524 }
1525 }
1526 /* }}} */
1527
1528 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1529 PHP_METHOD(tidyNode, isComment)
1530 {
1531 TIDY_FETCH_ONLY_OBJECT;
1532
1533 if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1534 RETURN_TRUE;
1535 } else {
1536 RETURN_FALSE;
1537 }
1538 }
1539 /* }}} */
1540
1541 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1542 PHP_METHOD(tidyNode, isHtml)
1543 {
1544 TIDY_FETCH_ONLY_OBJECT;
1545
1546 switch (tidyNodeGetType(obj->node)) {
1547 case TidyNode_Start:
1548 case TidyNode_End:
1549 case TidyNode_StartEnd:
1550 RETURN_TRUE;
1551 default:
1552 RETURN_FALSE;
1553 }
1554 }
1555 /* }}} */
1556
1557 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1558 PHP_METHOD(tidyNode, isText)
1559 {
1560 TIDY_FETCH_ONLY_OBJECT;
1561
1562 if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1563 RETURN_TRUE;
1564 } else {
1565 RETURN_FALSE;
1566 }
1567 }
1568 /* }}} */
1569
1570 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1571 PHP_METHOD(tidyNode, isJste)
1572 {
1573 TIDY_FETCH_ONLY_OBJECT;
1574
1575 if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1576 RETURN_TRUE;
1577 } else {
1578 RETURN_FALSE;
1579 }
1580 }
1581 /* }}} */
1582
1583 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1584 PHP_METHOD(tidyNode, isAsp)
1585 {
1586 TIDY_FETCH_ONLY_OBJECT;
1587
1588 if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1589 RETURN_TRUE;
1590 } else {
1591 RETURN_FALSE;
1592 }
1593 }
1594 /* }}} */
1595
1596 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1597 PHP_METHOD(tidyNode, isPhp)
1598 {
1599 TIDY_FETCH_ONLY_OBJECT;
1600
1601 if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1602 RETURN_TRUE;
1603 } else {
1604 RETURN_FALSE;
1605 }
1606 }
1607 /* }}} */
1608
1609 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1610 PHP_METHOD(tidyNode, getParent)
1611 {
1612 TidyNode parent_node;
1613 PHPTidyObj *newobj;
1614 TIDY_FETCH_ONLY_OBJECT;
1615
1616 parent_node = tidyGetParent(obj->node);
1617 if(parent_node) {
1618 tidy_instantiate(tidy_ce_node, return_value);
1619 newobj = Z_TIDY_P(return_value);
1620 newobj->node = parent_node;
1621 newobj->type = is_node;
1622 newobj->ptdoc = obj->ptdoc;
1623 newobj->ptdoc->ref_count++;
1624 tidy_add_node_default_properties(newobj);
1625 } else {
1626 ZVAL_NULL(return_value);
1627 }
1628 }
1629 /* }}} */
1630
1631
1632 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1633 PHP_METHOD(tidyNode, __construct)
1634 {
1635 zend_throw_error(NULL, "You should not create a tidyNode manually");
1636 }
1637 /* }}} */
1638
1639 #endif
1640