1 /* 2 +----------------------------------------------------------------------+ 3 | Copyright (c) The PHP Group | 4 +----------------------------------------------------------------------+ 5 | This source file is subject to version 3.01 of the PHP license, | 6 | that is bundled with this package in the file LICENSE, and is | 7 | available through the world-wide-web at the following url: | 8 | https://www.php.net/license/3_01.txt | 9 | If you did not receive a copy of the PHP license and are unable to | 10 | obtain it through the world-wide-web, please send a note to | 11 | license@php.net so we can mail you a copy immediately. | 12 +----------------------------------------------------------------------+ 13 | Authors: Niels Dossche <nielsdos@php.net> | 14 +----------------------------------------------------------------------+ 15 */ 16 17 #ifndef HTML5_PARSER_H 18 #define HTML5_PARSER_H 19 20 #include "namespace_compat.h" 21 #include <lexbor/html/parser.h> 22 #include <libxml/tree.h> 23 #include <Zend/zend_portability.h> 24 25 typedef enum { 26 LEXBOR_LIBXML2_BRIDGE_STATUS_OK = 0, 27 LEXBOR_LIBXML2_BRIDGE_STATUS_CANNOT_INIT, 28 LEXBOR_LIBXML2_BRIDGE_STATUS_FATAL_PARSE, 29 LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW, 30 LEXBOR_LIBXML2_BRIDGE_STATUS_OOM, 31 } lexbor_libxml2_bridge_status; 32 33 typedef void (*lexbor_libxml2_bridge_tokenizer_error_reporter)( 34 void *application_data, 35 lxb_html_tokenizer_error_t *error, 36 size_t offset 37 ); 38 typedef void (*lexbor_libxml2_bridge_tree_error_reporter)( 39 void *application_data, 40 lxb_html_tree_error_t *error, 41 size_t line, 42 size_t column, 43 size_t len 44 ); 45 46 typedef struct lexbor_libxml2_bridge_extracted_observations { 47 bool has_explicit_html_tag; 48 bool has_explicit_head_tag; 49 bool has_explicit_body_tag; 50 php_libxml_quirks_mode quirks_mode; 51 } lexbor_libxml2_bridge_extracted_observations; 52 53 typedef struct lexbor_libxml2_bridge_parse_context { 54 /* Private fields */ 55 lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter; 56 lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter; 57 /* Public fields */ 58 lexbor_libxml2_bridge_extracted_observations observations; 59 /* Application data, do what you want with this */ 60 void *application_data; 61 } lexbor_libxml2_bridge_parse_context; 62 63 void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx); 64 void lexbor_libxml2_bridge_parse_set_error_callbacks( 65 lexbor_libxml2_bridge_parse_context *ctx, 66 lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter, 67 lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter 68 ); 69 lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document( 70 lxb_html_document_t *document, 71 xmlDocPtr *doc_out, 72 bool compact_text_nodes, 73 bool create_default_ns, 74 php_dom_private_data *private_data 75 ); 76 lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment( 77 lxb_dom_node_t *start_node, 78 xmlDocPtr lxml_doc, 79 xmlNodePtr *fragment_out, 80 bool compact_text_nodes, 81 bool create_default_ns, 82 php_dom_private_data *private_data 83 ); 84 void lexbor_libxml2_bridge_report_errors( 85 const lexbor_libxml2_bridge_parse_context *ctx, 86 lxb_html_parser_t *parser, 87 const lxb_char_t *input_html, 88 size_t chunk_offset, 89 size_t *error_index_offset_tokenizer, 90 size_t *error_index_offset_tree 91 ); 92 void lexbor_libxml2_bridge_copy_observations( 93 lxb_html_tree_t *tree, 94 lexbor_libxml2_bridge_extracted_observations *observations 95 ); 96 97 #endif 98