xref: /PHP-8.0/Zend/zend_multibyte.c (revision fa8d9b11)
1 /*
2    +----------------------------------------------------------------------+
3    | Zend Engine                                                          |
4    +----------------------------------------------------------------------+
5    | Copyright (c) Zend Technologies Ltd. (http://www.zend.com)           |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 2.00 of the Zend license,     |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at                              |
10    | http://www.zend.com/license/2_00.txt.                                |
11    | If you did not receive a copy of the Zend license and are unable to  |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@zend.com so we can mail you a copy immediately.              |
14    +----------------------------------------------------------------------+
15    | Authors: Masaki Fujimoto <fujimoto@php.net>                          |
16    |          Rui Hirokawa <hirokawa@php.net>                             |
17    +----------------------------------------------------------------------+
18 */
19 
20 #include "zend.h"
21 #include "zend_compile.h"
22 #include "zend_operators.h"
23 #include "zend_multibyte.h"
24 #include "zend_ini.h"
25 
dummy_encoding_fetcher(const char * encoding_name)26 static const zend_encoding *dummy_encoding_fetcher(const char *encoding_name)
27 {
28 	return NULL;
29 }
30 
dummy_encoding_name_getter(const zend_encoding * encoding)31 static const char *dummy_encoding_name_getter(const zend_encoding *encoding)
32 {
33 	return (const char*)encoding;
34 }
35 
dummy_encoding_lexer_compatibility_checker(const zend_encoding * encoding)36 static bool dummy_encoding_lexer_compatibility_checker(const zend_encoding *encoding)
37 {
38 	return 0;
39 }
40 
dummy_encoding_detector(const unsigned char * string,size_t length,const zend_encoding ** list,size_t list_size)41 static const zend_encoding *dummy_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size)
42 {
43 	return NULL;
44 }
45 
dummy_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)46 static size_t dummy_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
47 {
48 	return (size_t)-1;
49 }
50 
dummy_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,bool persistent)51 static zend_result dummy_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
52 {
53 	*return_list = pemalloc(0, persistent);
54 	*return_size = 0;
55 	return SUCCESS;
56 }
57 
dummy_internal_encoding_getter(void)58 static const zend_encoding *dummy_internal_encoding_getter(void)
59 {
60 	return NULL;
61 }
62 
dummy_internal_encoding_setter(const zend_encoding * encoding)63 static zend_result dummy_internal_encoding_setter(const zend_encoding *encoding)
64 {
65 	return FAILURE;
66 }
67 
68 static zend_multibyte_functions multibyte_functions_dummy;
69 static zend_multibyte_functions multibyte_functions = {
70 	NULL,
71 	dummy_encoding_fetcher,
72 	dummy_encoding_name_getter,
73 	dummy_encoding_lexer_compatibility_checker,
74 	dummy_encoding_detector,
75 	dummy_encoding_converter,
76 	dummy_encoding_list_parser,
77 	dummy_internal_encoding_getter,
78 	dummy_internal_encoding_setter
79 };
80 
81 ZEND_API const zend_encoding *zend_multibyte_encoding_utf32be = (const zend_encoding*)"UTF-32BE";
82 ZEND_API const zend_encoding *zend_multibyte_encoding_utf32le = (const zend_encoding*)"UTF-32LE";
83 ZEND_API const zend_encoding *zend_multibyte_encoding_utf16be = (const zend_encoding*)"UTF-16BE";
84 ZEND_API const zend_encoding *zend_multibyte_encoding_utf16le = (const zend_encoding*)"UTF-32LE";
85 ZEND_API const zend_encoding *zend_multibyte_encoding_utf8 = (const zend_encoding*)"UTF-8";
86 
zend_multibyte_set_functions(const zend_multibyte_functions * functions)87 ZEND_API zend_result zend_multibyte_set_functions(const zend_multibyte_functions *functions)
88 {
89 	zend_multibyte_encoding_utf32be = functions->encoding_fetcher("UTF-32BE");
90 	if (!zend_multibyte_encoding_utf32be) {
91 		return FAILURE;
92 	}
93 	zend_multibyte_encoding_utf32le = functions->encoding_fetcher("UTF-32LE");
94 	if (!zend_multibyte_encoding_utf32le) {
95 		return FAILURE;
96 	}
97 	zend_multibyte_encoding_utf16be = functions->encoding_fetcher("UTF-16BE");
98 	if (!zend_multibyte_encoding_utf16be) {
99 		return FAILURE;
100 	}
101 	zend_multibyte_encoding_utf16le = functions->encoding_fetcher("UTF-16LE");
102 	if (!zend_multibyte_encoding_utf16le) {
103 		return FAILURE;
104 	}
105 	zend_multibyte_encoding_utf8 = functions->encoding_fetcher("UTF-8");
106 	if (!zend_multibyte_encoding_utf8) {
107 		return FAILURE;
108 	}
109 
110 	multibyte_functions_dummy = multibyte_functions;
111 	multibyte_functions = *functions;
112 
113 	/* As zend_multibyte_set_functions() gets called after ini settings were
114 	 * populated, we need to reinitialize script_encoding here.
115 	 */
116 	{
117 		const char *value = zend_ini_string("zend.script_encoding", sizeof("zend.script_encoding") - 1, 0);
118 		zend_multibyte_set_script_encoding_by_string(value, strlen(value));
119 	}
120 	return SUCCESS;
121 }
122 
zend_multibyte_restore_functions(void)123 ZEND_API void zend_multibyte_restore_functions(void)
124 {
125 	multibyte_functions = multibyte_functions_dummy;
126 }
127 
zend_multibyte_get_functions(void)128 ZEND_API const zend_multibyte_functions *zend_multibyte_get_functions(void)
129 {
130 	return multibyte_functions.provider_name ? &multibyte_functions: NULL;
131 }
132 
zend_multibyte_fetch_encoding(const char * name)133 ZEND_API const zend_encoding *zend_multibyte_fetch_encoding(const char *name)
134 {
135 	return multibyte_functions.encoding_fetcher(name);
136 }
137 
zend_multibyte_get_encoding_name(const zend_encoding * encoding)138 ZEND_API const char *zend_multibyte_get_encoding_name(const zend_encoding *encoding)
139 {
140 	return multibyte_functions.encoding_name_getter(encoding);
141 }
142 
zend_multibyte_check_lexer_compatibility(const zend_encoding * encoding)143 ZEND_API int zend_multibyte_check_lexer_compatibility(const zend_encoding *encoding)
144 {
145 	return multibyte_functions.lexer_compatibility_checker(encoding);
146 }
147 
zend_multibyte_encoding_detector(const unsigned char * string,size_t length,const zend_encoding ** list,size_t list_size)148 ZEND_API const zend_encoding *zend_multibyte_encoding_detector(const unsigned char *string, size_t length, const zend_encoding **list, size_t list_size)
149 {
150 	return multibyte_functions.encoding_detector(string, length, list, list_size);
151 }
152 
zend_multibyte_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)153 ZEND_API size_t zend_multibyte_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
154 {
155 	return multibyte_functions.encoding_converter(to, to_length, from, from_length, encoding_to, encoding_from);
156 }
157 
zend_multibyte_parse_encoding_list(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,bool persistent)158 ZEND_API zend_result zend_multibyte_parse_encoding_list(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
159 {
160 	return multibyte_functions.encoding_list_parser(encoding_list, encoding_list_len, return_list, return_size, persistent);
161 }
162 
zend_multibyte_get_internal_encoding(void)163 ZEND_API const zend_encoding *zend_multibyte_get_internal_encoding(void)
164 {
165 	return multibyte_functions.internal_encoding_getter();
166 }
167 
zend_multibyte_get_script_encoding(void)168 ZEND_API const zend_encoding *zend_multibyte_get_script_encoding(void)
169 {
170 	return LANG_SCNG(script_encoding);
171 }
172 
zend_multibyte_set_script_encoding(const zend_encoding ** encoding_list,size_t encoding_list_size)173 ZEND_API int zend_multibyte_set_script_encoding(const zend_encoding **encoding_list, size_t encoding_list_size)
174 {
175 	if (CG(script_encoding_list)) {
176 		free((char*)CG(script_encoding_list));
177 	}
178 	CG(script_encoding_list) = encoding_list;
179 	CG(script_encoding_list_size) = encoding_list_size;
180 	return SUCCESS;
181 }
182 
zend_multibyte_set_internal_encoding(const zend_encoding * encoding)183 ZEND_API zend_result zend_multibyte_set_internal_encoding(const zend_encoding *encoding)
184 {
185 	return multibyte_functions.internal_encoding_setter(encoding);
186 }
187 
zend_multibyte_set_script_encoding_by_string(const char * new_value,size_t new_value_length)188 ZEND_API zend_result zend_multibyte_set_script_encoding_by_string(const char *new_value, size_t new_value_length)
189 {
190 	const zend_encoding **list = 0;
191 	size_t size = 0;
192 
193 	if (!new_value) {
194 		zend_multibyte_set_script_encoding(NULL, 0);
195 		return SUCCESS;
196 	}
197 
198 	if (FAILURE == zend_multibyte_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
199 		return FAILURE;
200 	}
201 
202 	if (size == 0) {
203 		pefree((void*)list, 1);
204 		return FAILURE;
205 	}
206 
207 	if (FAILURE == zend_multibyte_set_script_encoding(list, size)) {
208 		return FAILURE;
209 	}
210 
211 	return SUCCESS;
212 }
213