xref: /php-src/ext/mbstring/mb_gpc.c (revision fd2d8696)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Rui Hirokawa <hirokawa@php.net>                              |
14    |         Moriyoshi Koizumi <moriyoshi@php.net>                        |
15    +----------------------------------------------------------------------+
16  */
17 
18 /* {{{ includes */
19 #include "php.h"
20 #include "php_variables.h"
21 #include "libmbfl/mbfl/mbfilter_pass.h"
22 #include "mbstring.h"
23 #include "ext/standard/url.h"
24 
25 #include "php_globals.h"
26 #include "TSRM.h"
27 
28 #include "mb_gpc.h"
29 /* }}} */
30 
ZEND_EXTERN_MODULE_GLOBALS(mbstring)31 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
32 
33 /* {{{ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
34  * http input processing */
35 MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
36 {
37 	char *res = NULL, *separator=NULL;
38 	const char *c_var;
39 	zval v_array;
40 	int free_buffer=0;
41 	const mbfl_encoding *detected;
42 	php_mb_encoding_handler_info_t info;
43 
44 	if (!MBSTRG(encoding_translation)) {
45 		php_default_treat_data(arg, str, destArray);
46 		return;
47 	}
48 
49 	switch (arg) {
50 		case PARSE_POST:
51 		case PARSE_GET:
52 		case PARSE_COOKIE:
53 			array_init(&v_array);
54 			switch (arg) {
55 				case PARSE_POST:
56 					ZVAL_COPY_VALUE(&PG(http_globals)[TRACK_VARS_POST], &v_array);
57 					break;
58 				case PARSE_GET:
59 					ZVAL_COPY_VALUE(&PG(http_globals)[TRACK_VARS_GET], &v_array);
60 					break;
61 				case PARSE_COOKIE:
62 					ZVAL_COPY_VALUE(&PG(http_globals)[TRACK_VARS_COOKIE], &v_array);
63 					break;
64 			}
65 			break;
66 		default:
67 			ZVAL_COPY_VALUE(&v_array, destArray);
68 			break;
69 	}
70 
71 	switch (arg) {
72 		case PARSE_POST:
73 			sapi_handle_post(&v_array);
74 			return;
75 		case PARSE_GET: /* GET data */
76 			c_var = SG(request_info).query_string;
77 			if (c_var && *c_var) {
78 				res = (char *) estrdup(c_var);
79 				free_buffer = 1;
80 			}
81 			break;
82 		case PARSE_COOKIE: /* Cookie data */
83 			c_var = SG(request_info).cookie_data;
84 			if (c_var && *c_var) {
85 				res = (char *) estrdup(c_var);
86 				free_buffer = 1;
87 			}
88 			break;
89 		case PARSE_STRING: /* String data */
90 			res = str;
91 			free_buffer = 1;
92 			break;
93 	}
94 
95 	if (!res) {
96 		return;
97 	}
98 
99 	switch (arg) {
100 		case PARSE_POST:
101 		case PARSE_GET:
102 		case PARSE_STRING:
103 			separator = (char *) estrdup(PG(arg_separator).input);
104 			break;
105 		case PARSE_COOKIE:
106 			separator = ";\0";
107 			break;
108 	}
109 
110 	switch (arg) {
111 		case PARSE_POST:
112 			MBSTRG(http_input_identify_post) = NULL;
113 			break;
114 		case PARSE_GET:
115 			MBSTRG(http_input_identify_get) = NULL;
116 			break;
117 		case PARSE_COOKIE:
118 			MBSTRG(http_input_identify_cookie) = NULL;
119 			break;
120 		case PARSE_STRING:
121 			MBSTRG(http_input_identify_string) = NULL;
122 			break;
123 	}
124 
125 	info.data_type              = arg;
126 	info.separator              = separator;
127 	info.report_errors          = false;
128 	info.to_encoding            = MBSTRG(internal_encoding);
129 	info.from_encodings         = MBSTRG(http_input_list);
130 	info.num_from_encodings     = MBSTRG(http_input_list_size);
131 
132 	MBSTRG(illegalchars) = 0;
133 
134 	detected = _php_mb_encoding_handler_ex(&info, &v_array, res);
135 	MBSTRG(http_input_identify) = detected;
136 
137 	if (detected) {
138 		switch(arg){
139 		case PARSE_POST:
140 			MBSTRG(http_input_identify_post) = detected;
141 			break;
142 		case PARSE_GET:
143 			MBSTRG(http_input_identify_get) = detected;
144 			break;
145 		case PARSE_COOKIE:
146 			MBSTRG(http_input_identify_cookie) = detected;
147 			break;
148 		case PARSE_STRING:
149 			MBSTRG(http_input_identify_string) = detected;
150 			break;
151 		}
152 	}
153 
154 	if (arg != PARSE_COOKIE) {
155 		efree(separator);
156 	}
157 
158 	if (free_buffer) {
159 		efree(res);
160 	}
161 }
162 /* }}} */
163 
164 /* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */
_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t * info,zval * array_ptr,char * res)165 const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *array_ptr, char *res)
166 {
167 	char *var, *val;
168 	char *strtok_buf = NULL, **val_list = NULL;
169 	size_t n, num = 1, *len_list = NULL;
170 	size_t new_val_len;
171 	const mbfl_encoding *from_encoding = NULL;
172 
173 	if (!res || *res == '\0') {
174 		goto out;
175 	}
176 
177 	/* count variables contained in `res`.
178 	 * separator may contain multiple separator chars; ANY of them demarcate variables */
179 	for (char *s1 = res; *s1; s1++) {
180 		for (const char *s2 = info->separator; *s2; s2++) {
181 			if (*s1 == *s2) {
182 				num++;
183 			}
184 		}
185 	}
186 	num *= 2; /* need space for variable name and value */
187 
188 	val_list = (char **)ecalloc(num, sizeof(char *));
189 	len_list = (size_t *)ecalloc(num, sizeof(size_t));
190 
191 	/* split and decode the query */
192 	n = 0;
193 	var = php_strtok_r(res, info->separator, &strtok_buf);
194 	while (var)  {
195 		val = strchr(var, '=');
196 		if (val) { /* have a value */
197 			len_list[n] = php_url_decode(var, val-var);
198 			val_list[n] = var;
199 			n++;
200 
201 			*val++ = '\0';
202 			val_list[n] = val;
203 			len_list[n] = php_url_decode(val, strlen(val));
204 		} else {
205 			len_list[n] = php_url_decode(var, strlen(var));
206 			val_list[n] = var;
207 			n++;
208 
209 			val_list[n] = "";
210 			len_list[n] = 0;
211 		}
212 		n++;
213 		var = php_strtok_r(NULL, info->separator, &strtok_buf);
214 	}
215 
216 	zend_long max_input_vars = REQUEST_PARSE_BODY_OPTION_GET(max_input_vars, PG(max_input_vars));
217 	if (ZEND_SIZE_T_GT_ZEND_LONG(n, max_input_vars * 2)) {
218 		php_error_docref(NULL, E_WARNING, "Input variables exceeded " ZEND_LONG_FMT ". To increase the limit change max_input_vars in php.ini.", max_input_vars);
219 		goto out;
220 	}
221 
222 	num = n; /* make sure to process initialized vars only */
223 
224 	/* initialize converter */
225 	if (info->num_from_encodings == 0) {
226 		from_encoding = &mbfl_encoding_pass;
227 	} else if (info->num_from_encodings == 1) {
228 		from_encoding = info->from_encodings[0];
229 	} else {
230 		from_encoding = mb_guess_encoding_for_strings((const unsigned char**)val_list, len_list, num, info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection), false);
231 		if (!from_encoding) {
232 			if (info->report_errors) {
233 				php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
234 			}
235 			from_encoding = &mbfl_encoding_pass;
236 		}
237 	}
238 
239 	/* convert encoding */
240 	n = 0;
241 	while (n < num) {
242 		if (from_encoding != &mbfl_encoding_pass && info->to_encoding != &mbfl_encoding_pass) {
243 			unsigned int num_errors = 0;
244 			zend_string *converted_var = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
245 			MBSTRG(illegalchars) += num_errors;
246 			n++;
247 
248 			num_errors = 0;
249 			zend_string *converted_val = mb_fast_convert((unsigned char*)val_list[n], len_list[n], from_encoding, info->to_encoding, MBSTRG(current_filter_illegal_substchar), MBSTRG(current_filter_illegal_mode), &num_errors);
250 			MBSTRG(illegalchars) += num_errors;
251 			n++;
252 
253 			/* `val` must be a pointer returned by `emalloc` */
254 			val = estrndup(ZSTR_VAL(converted_val), ZSTR_LEN(converted_val));
255 			if (sapi_module.input_filter(info->data_type, ZSTR_VAL(converted_var), &val, ZSTR_LEN(converted_val), &new_val_len)) {
256 				/* add variable to symbol table */
257 				php_register_variable_safe(ZSTR_VAL(converted_var), val, new_val_len, array_ptr);
258 			}
259 			zend_string_free(converted_var);
260 			zend_string_free(converted_val);
261 		} else {
262 			var = val_list[n++];
263 			val = estrndup(val_list[n], len_list[n]);
264 			if (sapi_module.input_filter(info->data_type, var, &val, len_list[n], &new_val_len)) {
265 				php_register_variable_safe(var, val, new_val_len, array_ptr);
266 			}
267 			n++;
268 		}
269 		efree(val);
270 	}
271 
272 out:
273 	if (val_list != NULL) {
274 		efree((void *)val_list);
275 	}
276 	if (len_list != NULL) {
277 		efree((void *)len_list);
278 	}
279 
280 	return from_encoding;
281 }
282 /* }}} */
283 
284 /* {{{ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) */
SAPI_POST_HANDLER_FUNC(php_mb_post_handler)285 SAPI_POST_HANDLER_FUNC(php_mb_post_handler)
286 {
287 	const mbfl_encoding *detected;
288 	php_mb_encoding_handler_info_t info;
289 	zend_string *post_data_str = NULL;
290 
291 	MBSTRG(http_input_identify_post) = NULL;
292 
293 	info.data_type              = PARSE_POST;
294 	info.separator              = "&";
295 	info.report_errors          = false;
296 	info.to_encoding            = MBSTRG(internal_encoding);
297 	info.from_encodings         = MBSTRG(http_input_list);
298 	info.num_from_encodings     = MBSTRG(http_input_list_size);
299 
300 	php_stream_rewind(SG(request_info).request_body);
301 	post_data_str = php_stream_copy_to_mem(SG(request_info).request_body, PHP_STREAM_COPY_ALL, 0);
302 	detected = _php_mb_encoding_handler_ex(&info, arg, post_data_str ? ZSTR_VAL(post_data_str) : NULL);
303 	if (post_data_str) {
304 		zend_string_release_ex(post_data_str, 0);
305 	}
306 
307 	MBSTRG(http_input_identify) = detected;
308 	if (detected) {
309 		MBSTRG(http_input_identify_post) = detected;
310 	}
311 }
312 /* }}} */
313