1 /* 2 +----------------------------------------------------------------------+ 3 | Copyright (c) The PHP Group | 4 +----------------------------------------------------------------------+ 5 | This source file is subject to version 3.01 of the PHP license, | 6 | that is bundled with this package in the file LICENSE, and is | 7 | available through the world-wide-web at the following url: | 8 | https://www.php.net/license/3_01.txt | 9 | If you did not receive a copy of the PHP license and are unable to | 10 | obtain it through the world-wide-web, please send a note to | 11 | license@php.net so we can mail you a copy immediately. | 12 +----------------------------------------------------------------------+ 13 | Authors: Niels Dossche <nielsdos@php.net> | 14 +----------------------------------------------------------------------+ 15 */ 16 17 #ifndef HTML5_PARSER_H 18 #define HTML5_PARSER_H 19 20 #include "namespace_compat.h" 21 #include <lexbor/html/parser.h> 22 #include <libxml/tree.h> 23 #include <Zend/zend_portability.h> 24 25 typedef enum { 26 LEXBOR_LIBXML2_BRIDGE_STATUS_OK = 0, 27 LEXBOR_LIBXML2_BRIDGE_STATUS_CANNOT_INIT, 28 LEXBOR_LIBXML2_BRIDGE_STATUS_FATAL_PARSE, 29 LEXBOR_LIBXML2_BRIDGE_STATUS_OVERFLOW, 30 LEXBOR_LIBXML2_BRIDGE_STATUS_OOM, 31 } lexbor_libxml2_bridge_status; 32 33 typedef void (*lexbor_libxml2_bridge_tokenizer_error_reporter)( 34 void *application_data, 35 lxb_html_tokenizer_error_t *error, 36 size_t offset 37 ); 38 typedef void (*lexbor_libxml2_bridge_tree_error_reporter)( 39 void *application_data, 40 lxb_html_tree_error_t *error, 41 size_t line, 42 size_t column, 43 size_t len 44 ); 45 46 typedef struct _lexbor_libxml2_bridge_extracted_observations { 47 bool has_explicit_html_tag; 48 bool has_explicit_head_tag; 49 bool has_explicit_body_tag; 50 } lexbor_libxml2_bridge_extracted_observations; 51 52 typedef struct _lexbor_libxml2_bridge_parse_context { 53 /* Private fields */ 54 lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter; 55 lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter; 56 /* Public fields */ 57 lexbor_libxml2_bridge_extracted_observations observations; 58 /* Application data, do what you want with this */ 59 void *application_data; 60 } lexbor_libxml2_bridge_parse_context; 61 62 void lexbor_libxml2_bridge_parse_context_init(lexbor_libxml2_bridge_parse_context *ctx); 63 void lexbor_libxml2_bridge_parse_set_error_callbacks( 64 lexbor_libxml2_bridge_parse_context *ctx, 65 lexbor_libxml2_bridge_tokenizer_error_reporter tokenizer_error_reporter, 66 lexbor_libxml2_bridge_tree_error_reporter tree_error_reporter 67 ); 68 lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document( 69 lxb_html_document_t *document, 70 xmlDocPtr *doc_out, 71 bool compact_text_nodes, 72 bool create_default_ns, 73 php_dom_libxml_ns_mapper *ns_mapper 74 ); 75 void lexbor_libxml2_bridge_report_errors( 76 const lexbor_libxml2_bridge_parse_context *ctx, 77 lxb_html_parser_t *parser, 78 const lxb_char_t *input_html, 79 size_t chunk_offset, 80 size_t *error_index_offset_tokenizer, 81 size_t *error_index_offset_tree 82 ); 83 void lexbor_libxml2_bridge_copy_observations( 84 lxb_html_tree_t *tree, 85 lexbor_libxml2_bridge_extracted_observations *observations 86 ); 87 88 #endif 89