xref: /PHP-5.5/ext/mbstring/mb_gpc.c (revision 73c1be26)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2015 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Rui Hirokawa <hirokawa@php.net>                              |
16    |         Moriyoshi Koizumi <moriyoshi@php.net>                        |
17    +----------------------------------------------------------------------+
18  */
19 
20 /* $Id$ */
21 
22 /* {{{ includes */
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 
27 #include "php.h"
28 #include "php_ini.h"
29 #include "php_variables.h"
30 #include "libmbfl/mbfl/mbfilter_pass.h"
31 #include "mbstring.h"
32 #include "ext/standard/php_string.h"
33 #include "ext/standard/php_mail.h"
34 #include "ext/standard/url.h"
35 #include "main/php_output.h"
36 #include "ext/standard/info.h"
37 
38 #include "php_variables.h"
39 #include "php_globals.h"
40 #include "rfc1867.h"
41 #include "php_content_types.h"
42 #include "SAPI.h"
43 #include "TSRM.h"
44 
45 #include "mb_gpc.h"
46 /* }}} */
47 
48 #if HAVE_MBSTRING
49 
ZEND_EXTERN_MODULE_GLOBALS(mbstring)50 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
51 
52 /* {{{ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
53  * http input processing */
54 MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data)
55 {
56 	char *res = NULL, *separator=NULL;
57 	const char *c_var;
58 	zval *array_ptr;
59 	int free_buffer=0;
60 	const mbfl_encoding *detected;
61 	php_mb_encoding_handler_info_t info;
62 
63 	if (arg != PARSE_STRING) {
64 		char *value = MBSTRG(internal_encoding_name);
65 		_php_mb_ini_mbstring_internal_encoding_set(value, value ? strlen(value): 0 TSRMLS_CC);
66 	}
67 
68 	if (!MBSTRG(encoding_translation)) {
69 		php_default_treat_data(arg, str, destArray TSRMLS_CC);
70 		return;
71 	}
72 
73 	switch (arg) {
74 		case PARSE_POST:
75 		case PARSE_GET:
76 		case PARSE_COOKIE:
77 			ALLOC_ZVAL(array_ptr);
78 			array_init(array_ptr);
79 			INIT_PZVAL(array_ptr);
80 			switch (arg) {
81 				case PARSE_POST:
82 					PG(http_globals)[TRACK_VARS_POST] = array_ptr;
83 					break;
84 				case PARSE_GET:
85 					PG(http_globals)[TRACK_VARS_GET] = array_ptr;
86 					break;
87 				case PARSE_COOKIE:
88 					PG(http_globals)[TRACK_VARS_COOKIE] = array_ptr;
89 					break;
90 			}
91 			break;
92 		default:
93 			array_ptr=destArray;
94 			break;
95 	}
96 
97 	if (arg==PARSE_POST) {
98 		sapi_handle_post(array_ptr TSRMLS_CC);
99 		return;
100 	}
101 
102 	if (arg == PARSE_GET) {		/* GET data */
103 		c_var = SG(request_info).query_string;
104 		if (c_var && *c_var) {
105 			res = (char *) estrdup(c_var);
106 			free_buffer = 1;
107 		} else {
108 			free_buffer = 0;
109 		}
110 	} else if (arg == PARSE_COOKIE) {		/* Cookie data */
111 		c_var = SG(request_info).cookie_data;
112 		if (c_var && *c_var) {
113 			res = (char *) estrdup(c_var);
114 			free_buffer = 1;
115 		} else {
116 			free_buffer = 0;
117 		}
118 	} else if (arg == PARSE_STRING) {		/* String data */
119 		res = str;
120 		free_buffer = 1;
121 	}
122 
123 	if (!res) {
124 		return;
125 	}
126 
127 	switch (arg) {
128 	case PARSE_POST:
129 	case PARSE_GET:
130 	case PARSE_STRING:
131 		separator = (char *) estrdup(PG(arg_separator).input);
132 		break;
133 	case PARSE_COOKIE:
134 		separator = ";\0";
135 		break;
136 	}
137 
138 	switch(arg) {
139 	case PARSE_POST:
140 		MBSTRG(http_input_identify_post) = NULL;
141 		break;
142 	case PARSE_GET:
143 		MBSTRG(http_input_identify_get) = NULL;
144 		break;
145 	case PARSE_COOKIE:
146 		MBSTRG(http_input_identify_cookie) = NULL;
147 		break;
148 	case PARSE_STRING:
149 		MBSTRG(http_input_identify_string) = NULL;
150 		break;
151 	}
152 
153 	info.data_type              = arg;
154 	info.separator              = separator;
155 	info.report_errors          = 0;
156 	info.to_encoding            = MBSTRG(internal_encoding);
157 	info.to_language            = MBSTRG(language);
158 	info.from_encodings         = MBSTRG(http_input_list);
159 	info.num_from_encodings     = MBSTRG(http_input_list_size);
160 	info.from_language          = MBSTRG(language);
161 
162 	MBSTRG(illegalchars) = 0;
163 
164 	detected = _php_mb_encoding_handler_ex(&info, array_ptr, res TSRMLS_CC);
165 	MBSTRG(http_input_identify) = detected;
166 
167 	if (detected) {
168 		switch(arg){
169 		case PARSE_POST:
170 			MBSTRG(http_input_identify_post) = detected;
171 			break;
172 		case PARSE_GET:
173 			MBSTRG(http_input_identify_get) = detected;
174 			break;
175 		case PARSE_COOKIE:
176 			MBSTRG(http_input_identify_cookie) = detected;
177 			break;
178 		case PARSE_STRING:
179 			MBSTRG(http_input_identify_string) = detected;
180 			break;
181 		}
182 	}
183 
184 	if (arg != PARSE_COOKIE) {
185 		efree(separator);
186 	}
187 
188 	if (free_buffer) {
189 		efree(res);
190 	}
191 }
192 /* }}} */
193 
194 /* {{{ mbfl_no_encoding _php_mb_encoding_handler_ex() */
_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t * info,zval * arg,char * res TSRMLS_DC)195 const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_info_t *info, zval *arg, char *res TSRMLS_DC)
196 {
197 	char *var, *val;
198 	const char *s1, *s2;
199 	char *strtok_buf = NULL, **val_list = NULL;
200 	zval *array_ptr = (zval *) arg;
201 	int n, num, *len_list = NULL;
202 	unsigned int val_len, new_val_len;
203 	mbfl_string string, resvar, resval;
204 	const mbfl_encoding *from_encoding = NULL;
205 	mbfl_encoding_detector *identd = NULL;
206 	mbfl_buffer_converter *convd = NULL;
207 
208 	mbfl_string_init_set(&string, info->to_language, info->to_encoding->no_encoding);
209 	mbfl_string_init_set(&resvar, info->to_language, info->to_encoding->no_encoding);
210 	mbfl_string_init_set(&resval, info->to_language, info->to_encoding->no_encoding);
211 
212 	if (!res || *res == '\0') {
213 		goto out;
214 	}
215 
216 	/* count the variables(separators) contained in the "res".
217 	 * separator may contain multiple separator chars.
218 	 */
219 	num = 1;
220 	for (s1=res; *s1 != '\0'; s1++) {
221 		for (s2=info->separator; *s2 != '\0'; s2++) {
222 			if (*s1 == *s2) {
223 				num++;
224 			}
225 		}
226 	}
227 	num *= 2; /* need space for variable name and value */
228 
229 	val_list = (char **)ecalloc(num, sizeof(char *));
230 	len_list = (int *)ecalloc(num, sizeof(int));
231 
232 	/* split and decode the query */
233 	n = 0;
234 	strtok_buf = NULL;
235 	var = php_strtok_r(res, info->separator, &strtok_buf);
236 	while (var)  {
237 		val = strchr(var, '=');
238 		if (val) { /* have a value */
239 			len_list[n] = php_url_decode(var, val-var);
240 			val_list[n] = var;
241 			n++;
242 
243 			*val++ = '\0';
244 			val_list[n] = val;
245 			len_list[n] = php_url_decode(val, strlen(val));
246 		} else {
247 			len_list[n] = php_url_decode(var, strlen(var));
248 			val_list[n] = var;
249 			n++;
250 
251 			val_list[n] = "";
252 			len_list[n] = 0;
253 		}
254 		n++;
255 		var = php_strtok_r(NULL, info->separator, &strtok_buf);
256 	}
257 
258 	if (n > (PG(max_input_vars) * 2)) {
259 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Input variables exceeded %ld. To increase the limit change max_input_vars in php.ini.", PG(max_input_vars));
260 		goto out;
261 	}
262 
263 	num = n; /* make sure to process initialized vars only */
264 
265 	/* initialize converter */
266 	if (info->num_from_encodings <= 0) {
267 		from_encoding = &mbfl_encoding_pass;
268 	} else if (info->num_from_encodings == 1) {
269 		from_encoding = info->from_encodings[0];
270 	} else {
271 		/* auto detect */
272 		from_encoding = NULL;
273 		identd = mbfl_encoding_detector_new2(info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection));
274 		if (identd != NULL) {
275 			n = 0;
276 			while (n < num) {
277 				string.val = (unsigned char *)val_list[n];
278 				string.len = len_list[n];
279 				if (mbfl_encoding_detector_feed(identd, &string)) {
280 					break;
281 				}
282 				n++;
283 			}
284 			from_encoding = mbfl_encoding_detector_judge2(identd);
285 			mbfl_encoding_detector_delete(identd);
286 		}
287 		if (!from_encoding) {
288 			if (info->report_errors) {
289 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
290 			}
291 			from_encoding = &mbfl_encoding_pass;
292 		}
293 	}
294 
295 	convd = NULL;
296 	if (from_encoding != &mbfl_encoding_pass) {
297 		convd = mbfl_buffer_converter_new2(from_encoding, info->to_encoding, 0);
298 		if (convd != NULL) {
299 			mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
300 			mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
301 		} else {
302 			if (info->report_errors) {
303 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
304 			}
305 			goto out;
306 		}
307 	}
308 
309 	/* convert encoding */
310 	string.no_encoding = from_encoding->no_encoding;
311 
312 	n = 0;
313 	while (n < num) {
314 		string.val = (unsigned char *)val_list[n];
315 		string.len = len_list[n];
316 		if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resvar) != NULL) {
317 			var = (char *)resvar.val;
318 		} else {
319 			var = val_list[n];
320 		}
321 		n++;
322 		string.val = (unsigned char *)val_list[n];
323 		string.len = len_list[n];
324 		if (convd != NULL && mbfl_buffer_converter_feed_result(convd, &string, &resval) != NULL) {
325 			val = (char *)resval.val;
326 			val_len = resval.len;
327 		} else {
328 			val = val_list[n];
329 			val_len = len_list[n];
330 		}
331 		n++;
332 		/* we need val to be emalloc()ed */
333 		val = estrndup(val, val_len);
334 		if (sapi_module.input_filter(info->data_type, var, &val, val_len, &new_val_len TSRMLS_CC)) {
335 			/* add variable to symbol table */
336 			php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC);
337 		}
338 		efree(val);
339 
340 		if (convd != NULL){
341 			mbfl_string_clear(&resvar);
342 			mbfl_string_clear(&resval);
343 		}
344 	}
345 
346 out:
347 	if (convd != NULL) {
348 		MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
349 		mbfl_buffer_converter_delete(convd);
350 	}
351 	if (val_list != NULL) {
352 		efree((void *)val_list);
353 	}
354 	if (len_list != NULL) {
355 		efree((void *)len_list);
356 	}
357 
358 	return from_encoding;
359 }
360 /* }}} */
361 
362 /* {{{ SAPI_POST_HANDLER_FUNC(php_mb_post_handler) */
SAPI_POST_HANDLER_FUNC(php_mb_post_handler)363 SAPI_POST_HANDLER_FUNC(php_mb_post_handler)
364 {
365 	const mbfl_encoding *detected;
366 	php_mb_encoding_handler_info_t info;
367 
368 	MBSTRG(http_input_identify_post) = NULL;
369 
370 	info.data_type              = PARSE_POST;
371 	info.separator              = "&";
372 	info.report_errors          = 0;
373 	info.to_encoding            = MBSTRG(internal_encoding);
374 	info.to_language            = MBSTRG(language);
375 	info.from_encodings         = MBSTRG(http_input_list);
376 	info.num_from_encodings     = MBSTRG(http_input_list_size);
377 	info.from_language          = MBSTRG(language);
378 
379 	detected = _php_mb_encoding_handler_ex(&info, arg, SG(request_info).post_data TSRMLS_CC);
380 
381 	MBSTRG(http_input_identify) = detected;
382 	if (detected) {
383 		MBSTRG(http_input_identify_post) = detected;
384 	}
385 }
386 /* }}} */
387 
388 #endif /* HAVE_MBSTRING */
389 
390 /*
391  * Local variables:
392  * tab-width: 4
393  * c-basic-offset: 4
394  * End:
395  * vim600: fdm=marker
396  * vim: noet sw=4 ts=4
397  */
398 
399