1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: John Coggeshall <john@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20
21 #include "php.h"
22 #include "php_tidy.h"
23
24 #ifdef HAVE_TIDY
25
26 #include "php_ini.h"
27 #include "ext/standard/info.h"
28
29 #ifdef HAVE_TIDY_H
30 #include "tidy.h"
31 #elif defined(HAVE_TIDYP_H)
32 #include "tidyp.h"
33 #endif
34
35 #ifdef HAVE_TIDYBUFFIO_H
36 #include "tidybuffio.h"
37 #else
38 #include "buffio.h"
39 #endif
40
41 #include "tidy_arginfo.h"
42
43 #include "Zend/zend_exceptions.h"
44
45 /* compatibility with older versions of libtidy */
46 #ifndef TIDY_CALL
47 #define TIDY_CALL
48 #endif
49
50 /* {{{ ext/tidy macros */
51 #define FIX_BUFFER(bptr) do { if ((bptr)->size) { (bptr)->bp[(bptr)->size-1] = '\0'; } } while(0)
52
53 #define TIDY_SET_CONTEXT \
54 zval *object = getThis();
55
56 #define TIDY_FETCH_OBJECT \
57 PHPTidyObj *obj; \
58 zval *object; \
59 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "O", &object, tidy_ce_doc) != SUCCESS) { \
60 RETURN_THROWS(); \
61 } \
62 obj = Z_TIDY_P(object); \
63
64 #define TIDY_FETCH_INITIALIZED_OBJECT \
65 TIDY_FETCH_OBJECT; \
66 if (!obj->ptdoc->initialized) { \
67 zend_throw_error(NULL, "tidy object is not initialized"); \
68 return; \
69 }
70
71 #define TIDY_FETCH_ONLY_OBJECT \
72 PHPTidyObj *obj; \
73 TIDY_SET_CONTEXT; \
74 if (zend_parse_parameters_none() != SUCCESS) { \
75 RETURN_THROWS(); \
76 } \
77 obj = Z_TIDY_P(object); \
78
79 #define TIDY_SET_DEFAULT_CONFIG(_doc) \
80 if (TG(default_config) && TG(default_config)[0]) { \
81 php_tidy_load_config(_doc, TG(default_config)); \
82 }
83 /* }}} */
84
85 /* {{{ ext/tidy structs */
86 typedef struct _PHPTidyDoc PHPTidyDoc;
87 typedef struct _PHPTidyObj PHPTidyObj;
88
89 typedef enum {
90 is_node,
91 is_doc
92 } tidy_obj_type;
93
94 typedef enum {
95 is_root_node,
96 is_html_node,
97 is_head_node,
98 is_body_node
99 } tidy_base_nodetypes;
100
101 struct _PHPTidyDoc {
102 TidyDoc doc;
103 TidyBuffer *errbuf;
104 unsigned int ref_count;
105 unsigned int initialized:1;
106 };
107
108 struct _PHPTidyObj {
109 TidyNode node;
110 tidy_obj_type type;
111 PHPTidyDoc *ptdoc;
112 zend_object std;
113 };
114
php_tidy_fetch_object(zend_object * obj)115 static inline PHPTidyObj *php_tidy_fetch_object(zend_object *obj) {
116 return (PHPTidyObj *)((char*)(obj) - XtOffsetOf(PHPTidyObj, std));
117 }
118
119 #define Z_TIDY_P(zv) php_tidy_fetch_object(Z_OBJ_P((zv)))
120 /* }}} */
121
122 /* {{{ ext/tidy prototypes */
123 static zend_string *php_tidy_file_to_mem(const char *, bool);
124 static void tidy_object_free_storage(zend_object *);
125 static zend_object *tidy_object_new_node(zend_class_entry *);
126 static zend_object *tidy_object_new_doc(zend_class_entry *);
127 static zval *tidy_instantiate(zend_class_entry *, zval *);
128 static zend_result tidy_doc_cast_handler(zend_object *, zval *, int);
129 static zend_result tidy_node_cast_handler(zend_object *, zval *, int);
130 static void tidy_doc_update_properties(PHPTidyObj *);
131 static void tidy_add_node_default_properties(PHPTidyObj *);
132 static void *php_tidy_get_opt_val(PHPTidyDoc *, TidyOption, TidyOptionType *);
133 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes);
134 static int _php_tidy_set_tidy_opt(TidyDoc, const char *, zval *);
135 static int _php_tidy_apply_config_array(TidyDoc doc, const HashTable *ht_options);
136 static PHP_INI_MH(php_tidy_set_clean_output);
137 static void php_tidy_clean_output_start(const char *name, size_t name_len);
138 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags);
139 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context);
140
141 static PHP_MINIT_FUNCTION(tidy);
142 static PHP_MSHUTDOWN_FUNCTION(tidy);
143 static PHP_RINIT_FUNCTION(tidy);
144 static PHP_RSHUTDOWN_FUNCTION(tidy);
145 static PHP_MINFO_FUNCTION(tidy);
146
147 ZEND_DECLARE_MODULE_GLOBALS(tidy)
148
149 PHP_INI_BEGIN()
150 STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals)
151 STD_PHP_INI_BOOLEAN("tidy.clean_output", "0", PHP_INI_USER, php_tidy_set_clean_output, clean_output, zend_tidy_globals, tidy_globals)
152 PHP_INI_END()
153
154 static zend_class_entry *tidy_ce_doc, *tidy_ce_node;
155
156 static zend_object_handlers tidy_object_handlers_doc;
157 static zend_object_handlers tidy_object_handlers_node;
158
159 zend_module_entry tidy_module_entry = {
160 STANDARD_MODULE_HEADER,
161 "tidy",
162 ext_functions,
163 PHP_MINIT(tidy),
164 PHP_MSHUTDOWN(tidy),
165 PHP_RINIT(tidy),
166 PHP_RSHUTDOWN(tidy),
167 PHP_MINFO(tidy),
168 PHP_TIDY_VERSION,
169 PHP_MODULE_GLOBALS(tidy),
170 NULL,
171 NULL,
172 NULL,
173 STANDARD_MODULE_PROPERTIES_EX
174 };
175
176 #ifdef COMPILE_DL_TIDY
177 #ifdef ZTS
178 ZEND_TSRMLS_CACHE_DEFINE()
179 #endif
ZEND_GET_MODULE(tidy)180 ZEND_GET_MODULE(tidy)
181 #endif
182
183 static void* TIDY_CALL php_tidy_malloc(size_t len)
184 {
185 return emalloc(len);
186 }
187
php_tidy_realloc(void * buf,size_t len)188 static void* TIDY_CALL php_tidy_realloc(void *buf, size_t len)
189 {
190 return erealloc(buf, len);
191 }
192
php_tidy_free(void * buf)193 static void TIDY_CALL php_tidy_free(void *buf)
194 {
195 efree(buf);
196 }
197
php_tidy_panic(ctmbstr msg)198 static void TIDY_CALL php_tidy_panic(ctmbstr msg)
199 {
200 php_error_docref(NULL, E_ERROR, "Could not allocate memory for tidy! (Reason: %s)", (const char *)msg);
201 }
202
php_tidy_load_config(TidyDoc doc,const char * path)203 static void php_tidy_load_config(TidyDoc doc, const char *path)
204 {
205 int ret = tidyLoadConfig(doc, path);
206 if (ret < 0) {
207 php_error_docref(NULL, E_WARNING, "Could not load the Tidy configuration file \"%s\"", path);
208 } else if (ret > 0) {
209 php_error_docref(NULL, E_NOTICE, "There were errors while parsing the Tidy configuration file \"%s\"", path);
210 }
211 }
212
php_tidy_apply_config(TidyDoc doc,const zend_string * str_string,const HashTable * ht_options)213 static zend_result php_tidy_apply_config(TidyDoc doc, const zend_string *str_string, const HashTable *ht_options)
214 {
215 if (ht_options) {
216 return _php_tidy_apply_config_array(doc, ht_options);
217 } else if (str_string) {
218 if (php_check_open_basedir(ZSTR_VAL(str_string))) {
219 return FAILURE;
220 }
221 php_tidy_load_config(doc, ZSTR_VAL(str_string));
222 }
223 return SUCCESS;
224 }
225
_php_tidy_set_tidy_opt(TidyDoc doc,const char * optname,zval * value)226 static int _php_tidy_set_tidy_opt(TidyDoc doc, const char *optname, zval *value)
227 {
228 TidyOption opt = tidyGetOptionByName(doc, optname);
229 zend_string *str, *tmp_str;
230 zend_long lval;
231
232 if (!opt) {
233 php_error_docref(NULL, E_WARNING, "Unknown Tidy configuration option \"%s\"", optname);
234 return FAILURE;
235 }
236
237 if (tidyOptIsReadOnly(opt)) {
238 php_error_docref(NULL, E_WARNING, "Attempting to set read-only option \"%s\"", optname);
239 return FAILURE;
240 }
241
242 switch(tidyOptGetType(opt)) {
243 case TidyString:
244 str = zval_get_tmp_string(value, &tmp_str);
245 if (tidyOptSetValue(doc, tidyOptGetId(opt), ZSTR_VAL(str))) {
246 zend_tmp_string_release(tmp_str);
247 return SUCCESS;
248 }
249 zend_tmp_string_release(tmp_str);
250 break;
251
252 case TidyInteger:
253 lval = zval_get_long(value);
254 if (tidyOptSetInt(doc, tidyOptGetId(opt), lval)) {
255 return SUCCESS;
256 }
257 break;
258
259 case TidyBoolean:
260 lval = zval_get_long(value);
261 if (tidyOptSetBool(doc, tidyOptGetId(opt), lval)) {
262 return SUCCESS;
263 }
264 break;
265
266 default:
267 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
268 break;
269 }
270
271 return FAILURE;
272 }
273
tidy_create_node_object(zval * zv,PHPTidyDoc * ptdoc,TidyNode node)274 static void tidy_create_node_object(zval *zv, PHPTidyDoc *ptdoc, TidyNode node)
275 {
276 tidy_instantiate(tidy_ce_node, zv);
277 PHPTidyObj *newobj = Z_TIDY_P(zv);
278 newobj->node = node;
279 newobj->type = is_node;
280 newobj->ptdoc = ptdoc;
281 newobj->ptdoc->ref_count++;
282 tidy_add_node_default_properties(newobj);
283 }
284
php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS,bool is_file)285 static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, bool is_file)
286 {
287 char *enc = NULL;
288 size_t enc_len = 0;
289 TidyDoc doc;
290 TidyBuffer *errbuf;
291 zend_string *data, *arg1, *config_str = NULL;
292 HashTable *config_ht = NULL;
293
294 if (is_file) {
295 bool use_include_path = 0;
296
297 ZEND_PARSE_PARAMETERS_START(1, 4)
298 Z_PARAM_PATH_STR(arg1)
299 Z_PARAM_OPTIONAL
300 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
301 Z_PARAM_STRING(enc, enc_len)
302 Z_PARAM_BOOL(use_include_path)
303 ZEND_PARSE_PARAMETERS_END();
304
305 if (!(data = php_tidy_file_to_mem(ZSTR_VAL(arg1), use_include_path))) {
306 RETURN_FALSE;
307 }
308 } else {
309 ZEND_PARSE_PARAMETERS_START(1, 3)
310 Z_PARAM_STR(arg1)
311 Z_PARAM_OPTIONAL
312 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(config_ht, config_str)
313 Z_PARAM_STRING(enc, enc_len)
314 ZEND_PARSE_PARAMETERS_END();
315
316 data = arg1;
317 }
318
319 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(data))) {
320 if (is_file) {
321 zend_string_release_ex(data, false);
322 zend_argument_value_error(1, "File content is too long");
323 } else {
324 zend_argument_value_error(1, "is too long");
325 }
326 RETURN_THROWS();
327 }
328
329 doc = tidyCreate();
330 errbuf = emalloc(sizeof(TidyBuffer));
331 tidyBufInit(errbuf);
332
333 if (tidySetErrorBuffer(doc, errbuf) != 0) {
334 tidyBufFree(errbuf);
335 efree(errbuf);
336 tidyRelease(doc);
337 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
338 }
339
340 tidyOptSetBool(doc, TidyForceOutput, yes);
341 tidyOptSetBool(doc, TidyMark, no);
342
343 TIDY_SET_DEFAULT_CONFIG(doc);
344
345 if (php_tidy_apply_config(doc, config_str, config_ht) != SUCCESS) {
346 RETVAL_FALSE;
347 } else if (enc_len) {
348 if (tidySetCharEncoding(doc, enc) < 0) {
349 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
350 RETVAL_FALSE;
351 }
352 }
353
354 if (data) {
355 TidyBuffer buf;
356
357 tidyBufInit(&buf);
358 tidyBufAttach(&buf, (byte *) ZSTR_VAL(data), (uint32_t)ZSTR_LEN(data));
359
360 if (tidyParseBuffer(doc, &buf) < 0) {
361 php_error_docref(NULL, E_WARNING, "%s", errbuf->bp);
362 RETVAL_FALSE;
363 } else {
364 if (tidyCleanAndRepair(doc) >= 0) {
365 TidyBuffer output;
366 tidyBufInit(&output);
367
368 tidySaveBuffer (doc, &output);
369 FIX_BUFFER(&output);
370 RETVAL_STRINGL((const char *) output.bp, output.size ? output.size-1 : 0);
371 tidyBufFree(&output);
372 } else {
373 RETVAL_FALSE;
374 }
375 }
376 }
377
378 if (is_file) {
379 zend_string_release_ex(data, 0);
380 }
381
382 tidyBufFree(errbuf);
383 efree(errbuf);
384 tidyRelease(doc);
385 }
386
php_tidy_file_to_mem(const char * filename,bool use_include_path)387 static zend_string *php_tidy_file_to_mem(const char *filename, bool use_include_path)
388 {
389 php_stream *stream;
390 zend_string *data = NULL;
391
392 if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0), NULL))) {
393 return NULL;
394 }
395 if ((data = php_stream_copy_to_mem(stream, PHP_STREAM_COPY_ALL, 0)) == NULL) {
396 data = ZSTR_EMPTY_ALLOC();
397 }
398 php_stream_close(stream);
399
400 return data;
401 }
402
tidy_object_free_storage(zend_object * object)403 static void tidy_object_free_storage(zend_object *object)
404 {
405 PHPTidyObj *intern = php_tidy_fetch_object(object);
406
407 zend_object_std_dtor(&intern->std);
408
409 if (intern->ptdoc) {
410 intern->ptdoc->ref_count--;
411
412 if (intern->ptdoc->ref_count <= 0) {
413 tidyBufFree(intern->ptdoc->errbuf);
414 efree(intern->ptdoc->errbuf);
415 tidyRelease(intern->ptdoc->doc);
416 efree(intern->ptdoc);
417 }
418 }
419 }
420
tidy_object_new(zend_class_entry * class_type,zend_object_handlers * handlers,tidy_obj_type objtype)421 static zend_object *tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, tidy_obj_type objtype)
422 {
423 PHPTidyObj *intern;
424
425 intern = zend_object_alloc(sizeof(PHPTidyObj), class_type);
426 zend_object_std_init(&intern->std, class_type);
427 object_properties_init(&intern->std, class_type);
428
429 switch(objtype) {
430 case is_node:
431 break;
432
433 case is_doc:
434 intern->ptdoc = emalloc(sizeof(PHPTidyDoc));
435 intern->ptdoc->doc = tidyCreate();
436 intern->ptdoc->ref_count = 1;
437 intern->ptdoc->initialized = 0;
438 intern->ptdoc->errbuf = emalloc(sizeof(TidyBuffer));
439 tidyBufInit(intern->ptdoc->errbuf);
440
441 if (tidySetErrorBuffer(intern->ptdoc->doc, intern->ptdoc->errbuf) != 0) {
442 tidyBufFree(intern->ptdoc->errbuf);
443 efree(intern->ptdoc->errbuf);
444 tidyRelease(intern->ptdoc->doc);
445 efree(intern->ptdoc);
446 efree(intern);
447 php_error_docref(NULL, E_ERROR, "Could not set Tidy error buffer");
448 }
449
450 tidyOptSetBool(intern->ptdoc->doc, TidyForceOutput, yes);
451 tidyOptSetBool(intern->ptdoc->doc, TidyMark, no);
452
453 TIDY_SET_DEFAULT_CONFIG(intern->ptdoc->doc);
454 break;
455 }
456
457 intern->std.handlers = handlers;
458
459 return &intern->std;
460 }
461
tidy_object_new_node(zend_class_entry * class_type)462 static zend_object *tidy_object_new_node(zend_class_entry *class_type)
463 {
464 return tidy_object_new(class_type, &tidy_object_handlers_node, is_node);
465 }
466
tidy_object_new_doc(zend_class_entry * class_type)467 static zend_object *tidy_object_new_doc(zend_class_entry *class_type)
468 {
469 return tidy_object_new(class_type, &tidy_object_handlers_doc, is_doc);
470 }
471
tidy_instantiate(zend_class_entry * pce,zval * object)472 static zval *tidy_instantiate(zend_class_entry *pce, zval *object)
473 {
474 object_init_ex(object, pce);
475 return object;
476 }
477
tidy_doc_cast_handler(zend_object * in,zval * out,int type)478 static zend_result tidy_doc_cast_handler(zend_object *in, zval *out, int type)
479 {
480 TidyBuffer output;
481 PHPTidyObj *obj;
482
483 switch (type) {
484 case IS_LONG:
485 case _IS_NUMBER:
486 ZVAL_LONG(out, 0);
487 break;
488
489 case IS_DOUBLE:
490 ZVAL_DOUBLE(out, 0);
491 break;
492
493 case _IS_BOOL:
494 ZVAL_TRUE(out);
495 break;
496
497 case IS_STRING:
498 obj = php_tidy_fetch_object(in);
499 tidyBufInit(&output);
500 tidySaveBuffer (obj->ptdoc->doc, &output);
501 if (output.size) {
502 ZVAL_STRINGL(out, (const char *) output.bp, output.size-1);
503 } else {
504 ZVAL_EMPTY_STRING(out);
505 }
506 tidyBufFree(&output);
507 break;
508
509 default:
510 return FAILURE;
511 }
512
513 return SUCCESS;
514 }
515
tidy_node_cast_handler(zend_object * in,zval * out,int type)516 static zend_result tidy_node_cast_handler(zend_object *in, zval *out, int type)
517 {
518 TidyBuffer buf;
519 PHPTidyObj *obj;
520
521 switch(type) {
522 case IS_LONG:
523 case _IS_NUMBER:
524 ZVAL_LONG(out, 0);
525 break;
526
527 case IS_DOUBLE:
528 ZVAL_DOUBLE(out, 0);
529 break;
530
531 case _IS_BOOL:
532 ZVAL_TRUE(out);
533 break;
534
535 case IS_STRING:
536 obj = php_tidy_fetch_object(in);
537 tidyBufInit(&buf);
538 if (obj->ptdoc) {
539 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
540 ZVAL_STRINGL(out, (const char *) buf.bp, buf.size-1);
541 } else {
542 ZVAL_EMPTY_STRING(out);
543 }
544 tidyBufFree(&buf);
545 break;
546
547 default:
548 return FAILURE;
549 }
550
551 return SUCCESS;
552 }
553
tidy_doc_update_properties(PHPTidyObj * obj)554 static void tidy_doc_update_properties(PHPTidyObj *obj)
555 {
556 TidyBuffer output;
557
558 tidyBufInit(&output);
559 tidySaveBuffer (obj->ptdoc->doc, &output);
560
561 if (output.size) {
562 zend_update_property_stringl(
563 tidy_ce_doc,
564 &obj->std,
565 "value",
566 sizeof("value") - 1,
567 (char*) output.bp,
568 output.size-1
569 );
570 }
571
572 tidyBufFree(&output);
573
574 if (obj->ptdoc->errbuf->size) {
575 zend_update_property_stringl(
576 tidy_ce_doc,
577 &obj->std,
578 "errorBuffer",
579 sizeof("errorBuffer") - 1,
580 (char*) obj->ptdoc->errbuf->bp,
581 obj->ptdoc->errbuf->size-1
582 );
583 }
584 }
585
tidy_add_node_default_properties(PHPTidyObj * obj)586 static void tidy_add_node_default_properties(PHPTidyObj *obj)
587 {
588 TidyBuffer buf;
589 TidyAttr tempattr;
590 TidyNode tempnode;
591 zval attribute, children, temp;
592 const char *name;
593
594 tidyBufInit(&buf);
595 tidyNodeGetText(obj->ptdoc->doc, obj->node, &buf);
596
597 zend_update_property_stringl(
598 tidy_ce_node,
599 &obj->std,
600 "value",
601 sizeof("value") - 1,
602 buf.size ? (const char *) buf.bp : "",
603 buf.size ? buf.size - 1 : 0
604 );
605
606 tidyBufFree(&buf);
607
608 name = (const char *) tidyNodeGetName(obj->node);
609
610 zend_update_property_string(
611 tidy_ce_node,
612 &obj->std,
613 "name",
614 sizeof("name") - 1,
615 name ? name : ""
616 );
617
618 zend_update_property_long(
619 tidy_ce_node,
620 &obj->std,
621 "type",
622 sizeof("type") - 1,
623 tidyNodeGetType(obj->node)
624 );
625
626 zend_update_property_long(
627 tidy_ce_node,
628 &obj->std,
629 "line",
630 sizeof("line") - 1,
631 tidyNodeLine(obj->node)
632 );
633
634 zend_update_property_long(
635 tidy_ce_node,
636 &obj->std,
637 "column",
638 sizeof("column") - 1,
639 tidyNodeColumn(obj->node)
640 );
641
642 zend_update_property_bool(
643 tidy_ce_node,
644 &obj->std,
645 "proprietary",
646 sizeof("proprietary") - 1,
647 tidyNodeIsProp(obj->ptdoc->doc, obj->node)
648 );
649
650 switch(tidyNodeGetType(obj->node)) {
651 case TidyNode_Root:
652 case TidyNode_DocType:
653 case TidyNode_Text:
654 case TidyNode_Comment:
655 zend_update_property_null(
656 tidy_ce_node,
657 &obj->std,
658 "id",
659 sizeof("id") - 1
660 );
661 break;
662
663 default:
664 zend_update_property_long(
665 tidy_ce_node,
666 &obj->std,
667 "id",
668 sizeof("id") - 1,
669 tidyNodeGetId(obj->node)
670 );
671 }
672
673 tempattr = tidyAttrFirst(obj->node);
674
675 if (tempattr) {
676 const char *name, *val;
677 array_init(&attribute);
678
679 do {
680 name = (const char *)tidyAttrName(tempattr);
681 val = (const char *)tidyAttrValue(tempattr);
682 if (name) {
683 if (val) {
684 add_assoc_string(&attribute, name, val);
685 } else {
686 add_assoc_str(&attribute, name, zend_empty_string);
687 }
688 }
689 } while((tempattr = tidyAttrNext(tempattr)));
690 } else {
691 ZVAL_NULL(&attribute);
692 }
693
694 zend_update_property(
695 tidy_ce_node,
696 &obj->std,
697 "attribute",
698 sizeof("attribute") - 1,
699 &attribute
700 );
701
702 zval_ptr_dtor(&attribute);
703
704 tempnode = tidyGetChild(obj->node);
705
706 if (tempnode) {
707 array_init(&children);
708 do {
709 tidy_create_node_object(&temp, obj->ptdoc, tempnode);
710 add_next_index_zval(&children, &temp);
711 } while((tempnode = tidyGetNext(tempnode)));
712
713 } else {
714 ZVAL_NULL(&children);
715 }
716
717 zend_update_property(
718 tidy_ce_node,
719 &obj->std,
720 "child",
721 sizeof("child") - 1,
722 &children
723 );
724
725 zval_ptr_dtor(&children);
726 }
727
php_tidy_get_opt_val(PHPTidyDoc * ptdoc,TidyOption opt,TidyOptionType * type)728 static void *php_tidy_get_opt_val(PHPTidyDoc *ptdoc, TidyOption opt, TidyOptionType *type)
729 {
730 *type = tidyOptGetType(opt);
731
732 switch (*type) {
733 case TidyString: {
734 char *val = (char *) tidyOptGetValue(ptdoc->doc, tidyOptGetId(opt));
735 if (val) {
736 return (void *) zend_string_init(val, strlen(val), 0);
737 } else {
738 return (void *) ZSTR_EMPTY_ALLOC();
739 }
740 }
741 break;
742
743 case TidyInteger:
744 return (void *) (uintptr_t) tidyOptGetInt(ptdoc->doc, tidyOptGetId(opt));
745 break;
746
747 case TidyBoolean:
748 return (void *) tidyOptGetBool(ptdoc->doc, tidyOptGetId(opt));
749 break;
750 }
751
752 /* should not happen */
753 return NULL;
754 }
755
php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS,tidy_base_nodetypes node_type)756 static void php_tidy_create_node(INTERNAL_FUNCTION_PARAMETERS, tidy_base_nodetypes node_type)
757 {
758 TidyNode node;
759 TIDY_FETCH_OBJECT;
760
761 switch (node_type) {
762 case is_root_node:
763 node = tidyGetRoot(obj->ptdoc->doc);
764 break;
765
766 case is_html_node:
767 node = tidyGetHtml(obj->ptdoc->doc);
768 break;
769
770 case is_head_node:
771 node = tidyGetHead(obj->ptdoc->doc);
772 break;
773
774 case is_body_node:
775 node = tidyGetBody(obj->ptdoc->doc);
776 break;
777
778 EMPTY_SWITCH_DEFAULT_CASE()
779 }
780
781 if (!node) {
782 RETURN_NULL();
783 }
784
785 tidy_create_node_object(return_value, obj->ptdoc, node);
786 }
787
_php_tidy_apply_config_array(TidyDoc doc,const HashTable * ht_options)788 static int _php_tidy_apply_config_array(TidyDoc doc, const HashTable *ht_options)
789 {
790 zval *opt_val;
791 zend_string *opt_name;
792
793 if (!HT_IS_PACKED(ht_options)) {
794 ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(ht_options, opt_name, opt_val) {
795 if (opt_name == NULL) {
796 continue;
797 }
798 _php_tidy_set_tidy_opt(doc, ZSTR_VAL(opt_name), opt_val);
799 } ZEND_HASH_FOREACH_END();
800 }
801 return SUCCESS;
802 }
803
php_tidy_parse_string(PHPTidyObj * obj,const char * string,uint32_t len,const char * enc)804 static int php_tidy_parse_string(PHPTidyObj *obj, const char *string, uint32_t len, const char *enc)
805 {
806 TidyBuffer buf;
807
808 if(enc) {
809 if (tidySetCharEncoding(obj->ptdoc->doc, enc) < 0) {
810 php_error_docref(NULL, E_WARNING, "Could not set encoding \"%s\"", enc);
811 return FAILURE;
812 }
813 }
814
815 obj->ptdoc->initialized = 1;
816
817 tidyBufInit(&buf);
818 tidyBufAttach(&buf, (byte *) string, len);
819 if (tidyParseBuffer(obj->ptdoc->doc, &buf) < 0) {
820 php_error_docref(NULL, E_WARNING, "%s", obj->ptdoc->errbuf->bp);
821 return FAILURE;
822 }
823 tidy_doc_update_properties(obj);
824
825 return SUCCESS;
826 }
827
PHP_MINIT_FUNCTION(tidy)828 static PHP_MINIT_FUNCTION(tidy)
829 {
830 tidySetMallocCall(php_tidy_malloc);
831 tidySetReallocCall(php_tidy_realloc);
832 tidySetFreeCall(php_tidy_free);
833 tidySetPanicCall(php_tidy_panic);
834
835 REGISTER_INI_ENTRIES();
836
837 tidy_ce_doc = register_class_tidy();
838 tidy_ce_doc->create_object = tidy_object_new_doc;
839 memcpy(&tidy_object_handlers_doc, &std_object_handlers, sizeof(zend_object_handlers));
840 tidy_object_handlers_doc.clone_obj = NULL;
841
842 tidy_ce_node = register_class_tidyNode();
843 tidy_ce_node->create_object = tidy_object_new_node;
844 memcpy(&tidy_object_handlers_node, &std_object_handlers, sizeof(zend_object_handlers));
845 tidy_object_handlers_node.clone_obj = NULL;
846
847 tidy_object_handlers_doc.cast_object = tidy_doc_cast_handler;
848 tidy_object_handlers_node.cast_object = tidy_node_cast_handler;
849
850 tidy_object_handlers_node.offset = tidy_object_handlers_doc.offset = XtOffsetOf(PHPTidyObj, std);
851 tidy_object_handlers_node.free_obj = tidy_object_handlers_doc.free_obj = tidy_object_free_storage;
852
853 register_tidy_symbols(module_number);
854
855 php_output_handler_alias_register(ZEND_STRL("ob_tidyhandler"), php_tidy_output_handler_init);
856
857 return SUCCESS;
858 }
859
PHP_RINIT_FUNCTION(tidy)860 static PHP_RINIT_FUNCTION(tidy)
861 {
862 #if defined(COMPILE_DL_TIDY) && defined(ZTS)
863 ZEND_TSRMLS_CACHE_UPDATE();
864 #endif
865
866 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
867
868 return SUCCESS;
869 }
870
PHP_RSHUTDOWN_FUNCTION(tidy)871 static PHP_RSHUTDOWN_FUNCTION(tidy)
872 {
873 TG(clean_output) = INI_ORIG_BOOL("tidy.clean_output");
874
875 return SUCCESS;
876 }
877
PHP_MSHUTDOWN_FUNCTION(tidy)878 static PHP_MSHUTDOWN_FUNCTION(tidy)
879 {
880 UNREGISTER_INI_ENTRIES();
881 return SUCCESS;
882 }
883
PHP_MINFO_FUNCTION(tidy)884 static PHP_MINFO_FUNCTION(tidy)
885 {
886 php_info_print_table_start();
887 php_info_print_table_row(2, "Tidy support", "enabled");
888 #ifdef HAVE_TIDYBUFFIO_H
889 php_info_print_table_row(2, "libTidy Version", (const char *)tidyLibraryVersion());
890 #elif defined(HAVE_TIDYP_H)
891 php_info_print_table_row(2, "libtidyp Version", (const char *)tidyVersion());
892 #endif
893 #ifdef HAVE_TIDYRELEASEDATE
894 php_info_print_table_row(2, "libTidy Release", (const char *)tidyReleaseDate());
895 #endif
896 php_info_print_table_end();
897
898 DISPLAY_INI_ENTRIES();
899 }
900
PHP_INI_MH(php_tidy_set_clean_output)901 static PHP_INI_MH(php_tidy_set_clean_output)
902 {
903 int status;
904 bool value;
905
906 value = zend_ini_parse_bool(new_value);
907
908 if (stage == PHP_INI_STAGE_RUNTIME) {
909 status = php_output_get_status();
910
911 if (value && (status & PHP_OUTPUT_WRITTEN)) {
912 php_error_docref(NULL, E_WARNING, "Cannot enable tidy.clean_output - there has already been output");
913 return FAILURE;
914 }
915 if (status & PHP_OUTPUT_SENT) {
916 php_error_docref(NULL, E_WARNING, "Cannot change tidy.clean_output - headers already sent");
917 return FAILURE;
918 }
919 }
920
921 status = OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
922
923 if (stage == PHP_INI_STAGE_RUNTIME && value) {
924 if (!php_output_handler_started(ZEND_STRL("ob_tidyhandler"))) {
925 php_tidy_clean_output_start(ZEND_STRL("ob_tidyhandler"));
926 }
927 }
928
929 return status;
930 }
931
932 /*
933 * NOTE: tidy does not support iterative/cumulative parsing, so chunk-sized output handler is not possible
934 */
935
php_tidy_clean_output_start(const char * name,size_t name_len)936 static void php_tidy_clean_output_start(const char *name, size_t name_len)
937 {
938 php_output_handler *h;
939
940 if (TG(clean_output) && (h = php_tidy_output_handler_init(name, name_len, 0, PHP_OUTPUT_HANDLER_STDFLAGS))) {
941 php_output_handler_start(h);
942 }
943 }
944
php_tidy_output_handler_init(const char * handler_name,size_t handler_name_len,size_t chunk_size,int flags)945 static php_output_handler *php_tidy_output_handler_init(const char *handler_name, size_t handler_name_len, size_t chunk_size, int flags)
946 {
947 if (chunk_size) {
948 php_error_docref(NULL, E_WARNING, "Cannot use a chunk size for ob_tidyhandler");
949 return NULL;
950 }
951 if (!TG(clean_output)) {
952 TG(clean_output) = 1;
953 }
954 return php_output_handler_create_internal(handler_name, handler_name_len, php_tidy_output_handler, chunk_size, flags);
955 }
956
php_tidy_output_handler(void ** nothing,php_output_context * output_context)957 static zend_result php_tidy_output_handler(void **nothing, php_output_context *output_context)
958 {
959 zend_result status = FAILURE;
960 TidyDoc doc;
961 TidyBuffer inbuf, outbuf, errbuf;
962
963 if (TG(clean_output) && (output_context->op & PHP_OUTPUT_HANDLER_START) && (output_context->op & PHP_OUTPUT_HANDLER_FINAL)) {
964 doc = tidyCreate();
965 tidyBufInit(&errbuf);
966
967 if (0 == tidySetErrorBuffer(doc, &errbuf)) {
968 tidyOptSetBool(doc, TidyForceOutput, yes);
969 tidyOptSetBool(doc, TidyMark, no);
970
971 if (ZEND_SIZE_T_UINT_OVFL(output_context->in.used)) {
972 php_error_docref(NULL, E_WARNING, "File content is too long");
973 return status;
974 }
975
976 TIDY_SET_DEFAULT_CONFIG(doc);
977
978 tidyBufInit(&inbuf);
979 tidyBufAttach(&inbuf, (byte *) output_context->in.data, (uint32_t)output_context->in.used);
980
981 if (0 <= tidyParseBuffer(doc, &inbuf) && 0 <= tidyCleanAndRepair(doc)) {
982 tidyBufInit(&outbuf);
983 tidySaveBuffer(doc, &outbuf);
984 FIX_BUFFER(&outbuf);
985 output_context->out.data = (char *) outbuf.bp;
986 output_context->out.used = outbuf.size ? outbuf.size-1 : 0;
987 output_context->out.free = 1;
988 status = SUCCESS;
989 }
990 }
991
992 tidyRelease(doc);
993 tidyBufFree(&errbuf);
994 }
995
996 return status;
997 }
998
999 /* {{{ Parse a document stored in a string */
PHP_FUNCTION(tidy_parse_string)1000 PHP_FUNCTION(tidy_parse_string)
1001 {
1002 char *enc = NULL;
1003 size_t enc_len = 0;
1004 zend_string *input, *options_str = NULL;
1005 HashTable *options_ht = NULL;
1006 PHPTidyObj *obj;
1007
1008 ZEND_PARSE_PARAMETERS_START(1, 3)
1009 Z_PARAM_STR(input)
1010 Z_PARAM_OPTIONAL
1011 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1012 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1013 ZEND_PARSE_PARAMETERS_END();
1014
1015 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1016 zend_argument_value_error(1, "is too long");
1017 RETURN_THROWS();
1018 }
1019
1020 tidy_instantiate(tidy_ce_doc, return_value);
1021 obj = Z_TIDY_P(return_value);
1022
1023 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1024 || php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) != SUCCESS) {
1025 zval_ptr_dtor(return_value);
1026 RETURN_FALSE;
1027 }
1028 }
1029 /* }}} */
1030
1031 /* {{{ Return warnings and errors which occurred parsing the specified document*/
PHP_FUNCTION(tidy_get_error_buffer)1032 PHP_FUNCTION(tidy_get_error_buffer)
1033 {
1034 TIDY_FETCH_OBJECT;
1035
1036 if (obj->ptdoc->errbuf && obj->ptdoc->errbuf->bp) {
1037 RETURN_STRINGL((const char*)obj->ptdoc->errbuf->bp, obj->ptdoc->errbuf->size-1);
1038 } else {
1039 RETURN_FALSE;
1040 }
1041 }
1042 /* }}} */
1043
1044 /* {{{ Return a string representing the parsed tidy markup */
PHP_FUNCTION(tidy_get_output)1045 PHP_FUNCTION(tidy_get_output)
1046 {
1047 TidyBuffer output;
1048 TIDY_FETCH_OBJECT;
1049
1050 tidyBufInit(&output);
1051 tidySaveBuffer(obj->ptdoc->doc, &output);
1052 FIX_BUFFER(&output);
1053 RETVAL_STRINGL((const char *) output.bp, output.size ? output.size-1 : 0);
1054 tidyBufFree(&output);
1055 }
1056 /* }}} */
1057
1058 /* {{{ Parse markup in file or URI */
PHP_FUNCTION(tidy_parse_file)1059 PHP_FUNCTION(tidy_parse_file)
1060 {
1061 char *enc = NULL;
1062 size_t enc_len = 0;
1063 bool use_include_path = 0;
1064 zend_string *inputfile, *contents, *options_str = NULL;
1065 HashTable *options_ht = NULL;
1066
1067 PHPTidyObj *obj;
1068
1069 ZEND_PARSE_PARAMETERS_START(1, 4)
1070 Z_PARAM_PATH_STR(inputfile)
1071 Z_PARAM_OPTIONAL
1072 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1073 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1074 Z_PARAM_BOOL(use_include_path)
1075 ZEND_PARSE_PARAMETERS_END();
1076
1077 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1078 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1079 RETURN_FALSE;
1080 }
1081
1082 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1083 zend_string_release_ex(contents, 0);
1084 zend_value_error("File content is too long");
1085 RETURN_THROWS();
1086 }
1087
1088 tidy_instantiate(tidy_ce_doc, return_value);
1089 obj = Z_TIDY_P(return_value);
1090
1091 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS
1092 || php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) != SUCCESS) {
1093 zval_ptr_dtor(return_value);
1094 RETVAL_FALSE;
1095 }
1096
1097 zend_string_release_ex(contents, 0);
1098 }
1099 /* }}} */
1100
1101 /* {{{ Execute configured cleanup and repair operations on parsed markup */
PHP_FUNCTION(tidy_clean_repair)1102 PHP_FUNCTION(tidy_clean_repair)
1103 {
1104 TIDY_FETCH_OBJECT;
1105
1106 if (tidyCleanAndRepair(obj->ptdoc->doc) >= 0) {
1107 tidy_doc_update_properties(obj);
1108 RETURN_TRUE;
1109 }
1110
1111 RETURN_FALSE;
1112 }
1113 /* }}} */
1114
1115 /* {{{ Repair a string using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_string)1116 PHP_FUNCTION(tidy_repair_string)
1117 {
1118 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, false);
1119 }
1120 /* }}} */
1121
1122 /* {{{ Repair a file using an optionally provided configuration file */
PHP_FUNCTION(tidy_repair_file)1123 PHP_FUNCTION(tidy_repair_file)
1124 {
1125 php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, true);
1126 }
1127 /* }}} */
1128
1129 /* {{{ Run configured diagnostics on parsed and repaired markup. */
PHP_FUNCTION(tidy_diagnose)1130 PHP_FUNCTION(tidy_diagnose)
1131 {
1132 TIDY_FETCH_OBJECT;
1133
1134 if (obj->ptdoc->initialized && tidyRunDiagnostics(obj->ptdoc->doc) >= 0) {
1135 tidy_doc_update_properties(obj);
1136 RETURN_TRUE;
1137 }
1138
1139 RETURN_FALSE;
1140 }
1141 /* }}} */
1142
1143 /* {{{ Get release date (version) for Tidy library */
PHP_FUNCTION(tidy_get_release)1144 PHP_FUNCTION(tidy_get_release)
1145 {
1146 if (zend_parse_parameters_none() != SUCCESS) {
1147 RETURN_THROWS();
1148 }
1149
1150 #ifdef HAVE_TIDYRELEASEDATE
1151 RETURN_STRING((const char *)tidyReleaseDate());
1152 #else
1153 RETURN_STRING((const char *)"unknown");
1154 #endif
1155 }
1156 /* }}} */
1157
1158
1159 #ifdef HAVE_TIDYOPTGETDOC
1160 /* {{{ Returns the documentation for the given option name */
PHP_FUNCTION(tidy_get_opt_doc)1161 PHP_FUNCTION(tidy_get_opt_doc)
1162 {
1163 PHPTidyObj *obj;
1164 const char *optval;
1165 char *optname;
1166 size_t optname_len;
1167 TidyOption opt;
1168 zval *object;
1169
1170 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) != SUCCESS) {
1171 RETURN_THROWS();
1172 }
1173
1174 obj = Z_TIDY_P(object);
1175
1176 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1177
1178 if (!opt) {
1179 zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1180 RETURN_THROWS();
1181 }
1182
1183 if ( (optval = (const char *) tidyOptGetDoc(obj->ptdoc->doc, opt)) ) {
1184 RETURN_STRING(optval);
1185 }
1186
1187 RETURN_FALSE;
1188 }
1189 /* }}} */
1190 #endif
1191
1192
1193 /* {{{ Get current Tidy configuration */
PHP_FUNCTION(tidy_get_config)1194 PHP_FUNCTION(tidy_get_config)
1195 {
1196 TidyIterator itOpt;
1197 const char *opt_name;
1198 void *opt_value;
1199 TidyOptionType optt;
1200
1201 TIDY_FETCH_OBJECT;
1202
1203 itOpt = tidyGetOptionList(obj->ptdoc->doc);
1204
1205 array_init(return_value);
1206
1207 while (itOpt) {
1208 TidyOption opt = tidyGetNextOption(obj->ptdoc->doc, &itOpt);
1209
1210 opt_name = (const char *)tidyOptGetName(opt);
1211 opt_value = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1212 switch (optt) {
1213 case TidyString:
1214 add_assoc_str(return_value, opt_name, (zend_string*)opt_value);
1215 break;
1216
1217 case TidyInteger:
1218 add_assoc_long(return_value, opt_name, (zend_long)opt_value);
1219 break;
1220
1221 case TidyBoolean:
1222 add_assoc_bool(return_value, opt_name, opt_value ? 1 : 0);
1223 break;
1224 }
1225 }
1226 }
1227 /* }}} */
1228
1229 /* {{{ Get status of specified document. */
PHP_FUNCTION(tidy_get_status)1230 PHP_FUNCTION(tidy_get_status)
1231 {
1232 TIDY_FETCH_OBJECT;
1233
1234 RETURN_LONG(tidyStatus(obj->ptdoc->doc));
1235 }
1236 /* }}} */
1237
1238 /* {{{ Get the Detected HTML version for the specified document. */
PHP_FUNCTION(tidy_get_html_ver)1239 PHP_FUNCTION(tidy_get_html_ver)
1240 {
1241 TIDY_FETCH_INITIALIZED_OBJECT;
1242
1243 RETURN_LONG(tidyDetectedHtmlVersion(obj->ptdoc->doc));
1244 }
1245 /* }}} */
1246
1247 /* {{{ Indicates if the document is a XHTML document. */
PHP_FUNCTION(tidy_is_xhtml)1248 PHP_FUNCTION(tidy_is_xhtml)
1249 {
1250 TIDY_FETCH_INITIALIZED_OBJECT;
1251
1252 RETURN_BOOL(tidyDetectedXhtml(obj->ptdoc->doc));
1253 }
1254 /* }}} */
1255
1256 /* {{{ Indicates if the document is a generic (non HTML/XHTML) XML document. */
PHP_FUNCTION(tidy_is_xml)1257 PHP_FUNCTION(tidy_is_xml)
1258 {
1259 TIDY_FETCH_INITIALIZED_OBJECT;
1260
1261 RETURN_BOOL(tidyDetectedGenericXml(obj->ptdoc->doc));
1262 }
1263 /* }}} */
1264
1265 /* {{{ Returns the Number of Tidy errors encountered for specified document. */
PHP_FUNCTION(tidy_error_count)1266 PHP_FUNCTION(tidy_error_count)
1267 {
1268 TIDY_FETCH_OBJECT;
1269
1270 RETURN_LONG(tidyErrorCount(obj->ptdoc->doc));
1271 }
1272 /* }}} */
1273
1274 /* {{{ Returns the Number of Tidy warnings encountered for specified document. */
PHP_FUNCTION(tidy_warning_count)1275 PHP_FUNCTION(tidy_warning_count)
1276 {
1277 TIDY_FETCH_OBJECT;
1278
1279 RETURN_LONG(tidyWarningCount(obj->ptdoc->doc));
1280 }
1281 /* }}} */
1282
1283 /* {{{ Returns the Number of Tidy accessibility warnings encountered for specified document. */
PHP_FUNCTION(tidy_access_count)1284 PHP_FUNCTION(tidy_access_count)
1285 {
1286 TIDY_FETCH_OBJECT;
1287
1288 RETURN_LONG(tidyAccessWarningCount(obj->ptdoc->doc));
1289 }
1290 /* }}} */
1291
1292 /* {{{ Returns the Number of Tidy configuration errors encountered for specified document. */
PHP_FUNCTION(tidy_config_count)1293 PHP_FUNCTION(tidy_config_count)
1294 {
1295 TIDY_FETCH_OBJECT;
1296
1297 RETURN_LONG(tidyConfigErrorCount(obj->ptdoc->doc));
1298 }
1299 /* }}} */
1300
1301 /* {{{ Returns the value of the specified configuration option for the tidy document. */
PHP_FUNCTION(tidy_getopt)1302 PHP_FUNCTION(tidy_getopt)
1303 {
1304 PHPTidyObj *obj;
1305 char *optname;
1306 void *optval;
1307 size_t optname_len;
1308 TidyOption opt;
1309 TidyOptionType optt;
1310 zval *object;
1311
1312 if (zend_parse_method_parameters(ZEND_NUM_ARGS(), getThis(), "Os", &object, tidy_ce_doc, &optname, &optname_len) != SUCCESS) {
1313 RETURN_THROWS();
1314 }
1315
1316 obj = Z_TIDY_P(object);
1317
1318 opt = tidyGetOptionByName(obj->ptdoc->doc, optname);
1319
1320 if (!opt) {
1321 zend_argument_value_error(hasThis() ? 1 : 2, "is an invalid configuration option, \"%s\" given", optname);
1322 RETURN_THROWS();
1323 }
1324
1325 optval = php_tidy_get_opt_val(obj->ptdoc, opt, &optt);
1326 switch (optt) {
1327 case TidyString:
1328 RETVAL_STR((zend_string*)optval);
1329 return;
1330
1331 case TidyInteger:
1332 RETURN_LONG((zend_long)optval);
1333 break;
1334
1335 case TidyBoolean:
1336 if (optval) {
1337 RETURN_TRUE;
1338 } else {
1339 RETURN_FALSE;
1340 }
1341 break;
1342
1343 default:
1344 php_error_docref(NULL, E_WARNING, "Unable to determine type of configuration option");
1345 break;
1346 }
1347
1348 RETURN_FALSE;
1349 }
1350 /* }}} */
1351
PHP_METHOD(tidy,__construct)1352 PHP_METHOD(tidy, __construct)
1353 {
1354 char *enc = NULL;
1355 size_t enc_len = 0;
1356 bool use_include_path = 0;
1357 HashTable *options_ht = NULL;
1358 zend_string *contents, *inputfile = NULL, *options_str = NULL;
1359 PHPTidyObj *obj;
1360
1361 ZEND_PARSE_PARAMETERS_START(0, 4)
1362 Z_PARAM_OPTIONAL
1363 Z_PARAM_PATH_STR_OR_NULL(inputfile)
1364 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1365 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1366 Z_PARAM_BOOL(use_include_path)
1367 ZEND_PARSE_PARAMETERS_END();
1368
1369 TIDY_SET_CONTEXT;
1370 obj = Z_TIDY_P(object);
1371
1372 if (inputfile) {
1373 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1374 zend_throw_error(zend_ce_exception, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1375 RETURN_THROWS();
1376 }
1377
1378 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1379 zend_string_release_ex(contents, 0);
1380 zend_value_error("File content is too long");
1381 RETURN_THROWS();
1382 }
1383
1384 zend_error_handling error_handling;
1385 zend_replace_error_handling(EH_THROW, NULL, &error_handling);
1386 if (php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) != SUCCESS) {
1387 zend_restore_error_handling(&error_handling);
1388 zend_string_release_ex(contents, 0);
1389 RETURN_THROWS();
1390 }
1391 zend_restore_error_handling(&error_handling);
1392
1393 php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc);
1394
1395 zend_string_release_ex(contents, 0);
1396 }
1397 }
1398
PHP_METHOD(tidy,parseFile)1399 PHP_METHOD(tidy, parseFile)
1400 {
1401 char *enc = NULL;
1402 size_t enc_len = 0;
1403 bool use_include_path = 0;
1404 HashTable *options_ht = NULL;
1405 zend_string *inputfile, *contents, *options_str = NULL;
1406 PHPTidyObj *obj;
1407
1408 ZEND_PARSE_PARAMETERS_START(1, 4)
1409 Z_PARAM_PATH_STR(inputfile)
1410 Z_PARAM_OPTIONAL
1411 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1412 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1413 Z_PARAM_BOOL(use_include_path)
1414 ZEND_PARSE_PARAMETERS_END();
1415
1416 TIDY_SET_CONTEXT;
1417 obj = Z_TIDY_P(object);
1418
1419 if (!(contents = php_tidy_file_to_mem(ZSTR_VAL(inputfile), use_include_path))) {
1420 php_error_docref(NULL, E_WARNING, "Cannot load \"%s\" into memory%s", ZSTR_VAL(inputfile), (use_include_path) ? " (using include path)" : "");
1421 RETURN_FALSE;
1422 }
1423
1424 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(contents))) {
1425 zend_string_release_ex(contents, 0);
1426 zend_value_error("File content is too long");
1427 RETURN_THROWS();
1428 }
1429
1430 RETVAL_BOOL(php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) == SUCCESS
1431 && php_tidy_parse_string(obj, ZSTR_VAL(contents), (uint32_t)ZSTR_LEN(contents), enc) == SUCCESS);
1432
1433 zend_string_release_ex(contents, 0);
1434 }
1435
PHP_METHOD(tidy,parseString)1436 PHP_METHOD(tidy, parseString)
1437 {
1438 char *enc = NULL;
1439 size_t enc_len = 0;
1440 HashTable *options_ht = NULL;
1441 PHPTidyObj *obj;
1442 zend_string *input, *options_str = NULL;
1443
1444 ZEND_PARSE_PARAMETERS_START(1, 3)
1445 Z_PARAM_STR(input)
1446 Z_PARAM_OPTIONAL
1447 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(options_ht, options_str)
1448 Z_PARAM_STRING_OR_NULL(enc, enc_len)
1449 ZEND_PARSE_PARAMETERS_END();
1450
1451 if (ZEND_SIZE_T_UINT_OVFL(ZSTR_LEN(input))) {
1452 zend_argument_value_error(1, "is too long");
1453 RETURN_THROWS();
1454 }
1455
1456 TIDY_SET_CONTEXT;
1457 obj = Z_TIDY_P(object);
1458
1459 RETURN_BOOL(php_tidy_apply_config(obj->ptdoc->doc, options_str, options_ht) == SUCCESS
1460 && php_tidy_parse_string(obj, ZSTR_VAL(input), (uint32_t)ZSTR_LEN(input), enc) == SUCCESS);
1461 }
1462
1463
1464 /* {{{ Returns a TidyNode Object representing the root of the tidy parse tree */
PHP_FUNCTION(tidy_get_root)1465 PHP_FUNCTION(tidy_get_root)
1466 {
1467 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_root_node);
1468 }
1469 /* }}} */
1470
1471 /* {{{ Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_html)1472 PHP_FUNCTION(tidy_get_html)
1473 {
1474 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_html_node);
1475 }
1476 /* }}} */
1477
1478 /* {{{ Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_head)1479 PHP_FUNCTION(tidy_get_head)
1480 {
1481 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_head_node);
1482 }
1483 /* }}} */
1484
1485 /* {{{ Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */
PHP_FUNCTION(tidy_get_body)1486 PHP_FUNCTION(tidy_get_body)
1487 {
1488 php_tidy_create_node(INTERNAL_FUNCTION_PARAM_PASSTHRU, is_body_node);
1489 }
1490 /* }}} */
1491
1492 /* {{{ Returns true if this node has children */
PHP_METHOD(tidyNode,hasChildren)1493 PHP_METHOD(tidyNode, hasChildren)
1494 {
1495 TIDY_FETCH_ONLY_OBJECT;
1496
1497 if (tidyGetChild(obj->node)) {
1498 RETURN_TRUE;
1499 } else {
1500 RETURN_FALSE;
1501 }
1502 }
1503 /* }}} */
1504
1505 /* {{{ Returns true if this node has siblings */
PHP_METHOD(tidyNode,hasSiblings)1506 PHP_METHOD(tidyNode, hasSiblings)
1507 {
1508 TIDY_FETCH_ONLY_OBJECT;
1509
1510 if (obj->node && tidyGetNext(obj->node)) {
1511 RETURN_TRUE;
1512 } else {
1513 RETURN_FALSE;
1514 }
1515 }
1516 /* }}} */
1517
1518 /* {{{ Returns true if this node represents a comment */
PHP_METHOD(tidyNode,isComment)1519 PHP_METHOD(tidyNode, isComment)
1520 {
1521 TIDY_FETCH_ONLY_OBJECT;
1522
1523 if (tidyNodeGetType(obj->node) == TidyNode_Comment) {
1524 RETURN_TRUE;
1525 } else {
1526 RETURN_FALSE;
1527 }
1528 }
1529 /* }}} */
1530
1531 /* {{{ Returns true if this node is part of a HTML document */
PHP_METHOD(tidyNode,isHtml)1532 PHP_METHOD(tidyNode, isHtml)
1533 {
1534 TIDY_FETCH_ONLY_OBJECT;
1535
1536 switch (tidyNodeGetType(obj->node)) {
1537 case TidyNode_Start:
1538 case TidyNode_End:
1539 case TidyNode_StartEnd:
1540 RETURN_TRUE;
1541 default:
1542 RETURN_FALSE;
1543 }
1544 }
1545 /* }}} */
1546
1547 /* {{{ Returns true if this node represents text (no markup) */
PHP_METHOD(tidyNode,isText)1548 PHP_METHOD(tidyNode, isText)
1549 {
1550 TIDY_FETCH_ONLY_OBJECT;
1551
1552 if (tidyNodeGetType(obj->node) == TidyNode_Text) {
1553 RETURN_TRUE;
1554 } else {
1555 RETURN_FALSE;
1556 }
1557 }
1558 /* }}} */
1559
1560 /* {{{ Returns true if this node is JSTE */
PHP_METHOD(tidyNode,isJste)1561 PHP_METHOD(tidyNode, isJste)
1562 {
1563 TIDY_FETCH_ONLY_OBJECT;
1564
1565 if (tidyNodeGetType(obj->node) == TidyNode_Jste) {
1566 RETURN_TRUE;
1567 } else {
1568 RETURN_FALSE;
1569 }
1570 }
1571 /* }}} */
1572
1573 /* {{{ Returns true if this node is ASP */
PHP_METHOD(tidyNode,isAsp)1574 PHP_METHOD(tidyNode, isAsp)
1575 {
1576 TIDY_FETCH_ONLY_OBJECT;
1577
1578 if (tidyNodeGetType(obj->node) == TidyNode_Asp) {
1579 RETURN_TRUE;
1580 } else {
1581 RETURN_FALSE;
1582 }
1583 }
1584 /* }}} */
1585
1586 /* {{{ Returns true if this node is PHP */
PHP_METHOD(tidyNode,isPhp)1587 PHP_METHOD(tidyNode, isPhp)
1588 {
1589 TIDY_FETCH_ONLY_OBJECT;
1590
1591 if (tidyNodeGetType(obj->node) == TidyNode_Php) {
1592 RETURN_TRUE;
1593 } else {
1594 RETURN_FALSE;
1595 }
1596 }
1597 /* }}} */
1598
1599 /* {{{ Returns the parent node if available or NULL */
PHP_METHOD(tidyNode,getParent)1600 PHP_METHOD(tidyNode, getParent)
1601 {
1602 TIDY_FETCH_ONLY_OBJECT;
1603
1604 TidyNode parent_node = tidyGetParent(obj->node);
1605 if (parent_node) {
1606 tidy_create_node_object(return_value, obj->ptdoc, parent_node);
1607 }
1608 }
1609 /* }}} */
1610
PHP_METHOD(tidyNode,getPreviousSibling)1611 PHP_METHOD(tidyNode, getPreviousSibling)
1612 {
1613 TIDY_FETCH_ONLY_OBJECT;
1614
1615 TidyNode previous_node = tidyGetPrev(obj->node);
1616 if (previous_node) {
1617 tidy_create_node_object(return_value, obj->ptdoc, previous_node);
1618 }
1619 }
1620
PHP_METHOD(tidyNode,getNextSibling)1621 PHP_METHOD(tidyNode, getNextSibling)
1622 {
1623 TIDY_FETCH_ONLY_OBJECT;
1624
1625 TidyNode next_node = tidyGetNext(obj->node);
1626 if (next_node) {
1627 tidy_create_node_object(return_value, obj->ptdoc, next_node);
1628 }
1629 }
1630
1631 /* {{{ __constructor for tidyNode. */
PHP_METHOD(tidyNode,__construct)1632 PHP_METHOD(tidyNode, __construct)
1633 {
1634 zend_throw_error(NULL, "You should not create a tidyNode manually");
1635 }
1636 /* }}} */
1637
1638 #endif
1639