xref: /PHP-7.1/ext/intl/uchar/uchar.c (revision 03f3b847)
1 #include "uchar.h"
2 #include "intl_data.h"
3 #include "intl_convert.h"
4 
5 #include <unicode/uchar.h>
6 
7 #define IC_METHOD(mname) PHP_METHOD(IntlChar, mname)
8 
convert_cp(UChar32 * pcp,zval * zcp)9 static inline int convert_cp(UChar32* pcp, zval *zcp) {
10 	zend_long cp = -1;
11 
12 	if (Z_TYPE_P(zcp) == IS_LONG) {
13 		cp = Z_LVAL_P(zcp);
14 	} else if (Z_TYPE_P(zcp) == IS_STRING) {
15 		int32_t i = 0;
16 		size_t zcp_len = Z_STRLEN_P(zcp);
17 
18 		if (ZEND_SIZE_T_INT_OVFL(zcp_len)) {
19 			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
20 			intl_error_set_custom_msg(NULL, "Input string is too long.", 0);
21 			return FAILURE;
22 		}
23 
24 		U8_NEXT(Z_STRVAL_P(zcp), i, zcp_len, cp);
25 		if ((size_t)i != zcp_len) {
26 			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
27 			intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
28 			return FAILURE;
29 		}
30 	} else {
31 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
32 		intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point.  Must be either integer or UTF-8 sequence.", 0);
33 		return FAILURE;
34 	}
35 	if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) {
36 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
37 		intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
38 		return FAILURE;
39 	}
40 	*pcp = (UChar32)cp;
41 	return SUCCESS;
42 }
43 
44 /* {{{ proto string IntlChar::chr(int|string $codepoint)
45  * Converts a numeric codepoint to UTF-8
46  * Acts as an identify function when given a valid UTF-8 encoded codepoint
47  */
48 ZEND_BEGIN_ARG_INFO_EX(chr_arginfo, 0, ZEND_RETURN_VALUE, 1)
49 	ZEND_ARG_INFO(0, codepoint)
50 ZEND_END_ARG_INFO();
IC_METHOD(chr)51 IC_METHOD(chr) {
52 	UChar32 cp;
53 	zval *zcp;
54 	char buffer[5];
55 	int buffer_len = 0;
56 
57 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
58 	    (convert_cp(&cp, zcp) == FAILURE)) {
59 		return;
60 	}
61 
62 	/* We can use unsafe because we know the codepoint is in valid range
63 	 * and that 4 bytes is enough for any unicode point
64 	 */
65 	U8_APPEND_UNSAFE(buffer, buffer_len, cp);
66 	buffer[buffer_len] = 0;
67 	RETURN_STRINGL(buffer, buffer_len);
68 }
69 /* }}} */
70 
71 /* {{{ proto int IntlChar::ord(int|string $character)
72  * Converts a UTf-8 encoded codepoint to its integer U32 value
73  * Acts as an identity function when passed a valid integer codepoint
74  */
75 ZEND_BEGIN_ARG_INFO_EX(ord_arginfo, 0, ZEND_RETURN_VALUE, 1)
76 	ZEND_ARG_INFO(0, character)
77 ZEND_END_ARG_INFO();
IC_METHOD(ord)78 IC_METHOD(ord) {
79 	UChar32 cp;
80 	zval *zcp;
81 
82 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
83 	    (convert_cp(&cp, zcp) == FAILURE)) {
84 		return;
85 	}
86 
87 	RETURN_LONG(cp);
88 }
89 /* }}} */
90 
91 /* {{{ proto bool IntlChar::hasBinaryProperty(int|string $codepoint, int $property) */
92 ZEND_BEGIN_ARG_INFO_EX(hasBinaryProperty_arginfo, 0, ZEND_RETURN_VALUE, 2)
93 	ZEND_ARG_INFO(0, codepoint)
94 	ZEND_ARG_INFO(0, property)
95 ZEND_END_ARG_INFO();
IC_METHOD(hasBinaryProperty)96 IC_METHOD(hasBinaryProperty) {
97 	UChar32 cp;
98 	zend_long prop;
99 	zval *zcp;
100 
101 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
102 	    (convert_cp(&cp, zcp) == FAILURE)) {
103 		return;
104 	}
105 
106 	RETURN_BOOL(u_hasBinaryProperty(cp, (UProperty)prop));
107 }
108 /* }}} */
109 
110 /* {{{ proto int IntlChar::getIntPropertyValue(int|string $codepoint, int $property) */
111 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyValue_arginfo, 0, ZEND_RETURN_VALUE, 2)
112 	ZEND_ARG_INFO(0, codepoint)
113 	ZEND_ARG_INFO(0, property)
114 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyValue)115 IC_METHOD(getIntPropertyValue) {
116 	UChar32 cp;
117 	zend_long prop;
118 	zval *zcp;
119 
120 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
121 	    (convert_cp(&cp, zcp) == FAILURE)) {
122 		return;
123 	}
124 
125 	RETURN_LONG(u_getIntPropertyValue(cp, (UProperty)prop));
126 }
127 /* }}} */
128 
129 /* {{{ proto int IntlChar::getIntPropertyMinValue(int $property) */
130 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMinValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
131 	ZEND_ARG_INFO(0, property)
132 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyMinValue)133 IC_METHOD(getIntPropertyMinValue) {
134 	zend_long prop;
135 
136 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
137 		return;
138 	}
139 
140 	RETURN_LONG(u_getIntPropertyMinValue((UProperty)prop));
141 }
142 /* }}} */
143 
144 /* {{{ proto int IntlChar::getIntPropertyMaxValue(int $property) */
145 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMaxValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
146 	ZEND_ARG_INFO(0, property)
147 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyMaxValue)148 IC_METHOD(getIntPropertyMaxValue) {
149 	zend_long prop;
150 
151 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
152 		return;
153 	}
154 
155 	RETURN_LONG(u_getIntPropertyMaxValue((UProperty)prop));
156 }
157 /* }}} */
158 
159 /* {{{ proto float IntlChar::getNumericValue(int|string $codepoint) */
160 ZEND_BEGIN_ARG_INFO_EX(getNumericValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
161 	ZEND_ARG_INFO(0, codepoint)
162 ZEND_END_ARG_INFO();
IC_METHOD(getNumericValue)163 IC_METHOD(getNumericValue) {
164 	UChar32 cp;
165 	zval *zcp;
166 
167 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
168 	    (convert_cp(&cp, zcp) == FAILURE)) {
169 		return;
170 	}
171 
172 	RETURN_DOUBLE(u_getNumericValue(cp));
173 }
174 /* }}} */
175 
176 /* {{{ proto void IntlChar::enumCharTypes(callable $callback) */
177 ZEND_BEGIN_ARG_INFO_EX(enumCharTypes_arginfo, 0, ZEND_RETURN_VALUE, 0)
178 	ZEND_ARG_INFO(0, callback)
179 ZEND_END_ARG_INFO();
180 typedef struct _enumCharType_data {
181 	zend_fcall_info fci;
182 	zend_fcall_info_cache fci_cache;
183 } enumCharType_data;
enumCharType_callback(enumCharType_data * context,UChar32 start,UChar32 limit,UCharCategory type)184 static UBool enumCharType_callback(enumCharType_data *context,
185                                    UChar32 start, UChar32 limit,
186                                    UCharCategory type) {
187 	zval retval;
188 	zval args[3];
189 
190 	ZVAL_NULL(&retval);
191 	/* Note that $start is INclusive, while $limit is EXclusive
192 	 * Therefore (0, 32, 15) means CPs 0..31 are of type 15
193 	 */
194 	ZVAL_LONG(&args[0], start);
195 	ZVAL_LONG(&args[1], limit);
196 	ZVAL_LONG(&args[2], type);
197 
198 	context->fci.retval = &retval;
199 	context->fci.param_count = 3;
200 	context->fci.params = args;
201 
202 	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
203 		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
204 		intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0);
205 		zval_dtor(&retval);
206 		return 0;
207 	}
208 	zval_dtor(&retval);
209 	return 1;
210 }
IC_METHOD(enumCharTypes)211 IC_METHOD(enumCharTypes) {
212 	enumCharType_data context;
213 
214 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "f", &context.fci, &context.fci_cache) == FAILURE) {
215 		return;
216 	}
217 	u_enumCharTypes((UCharEnumTypeRange*)enumCharType_callback, &context);
218 }
219 /* }}} */
220 
221 /* {{{ proto int IntlChar::getBlockCode(int|string $codepoint) */
222 ZEND_BEGIN_ARG_INFO_EX(getBlockCode_arginfo, 0, ZEND_RETURN_VALUE, 1)
223 	ZEND_ARG_INFO(0, codepoint)
ZEND_END_ARG_INFO()224 ZEND_END_ARG_INFO()
225 IC_METHOD(getBlockCode) {
226 	UChar32 cp;
227 	zval *zcp;
228 
229 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
230 	    (convert_cp(&cp, zcp) == FAILURE)) {
231 		return;
232 	}
233 
234 	RETURN_LONG(ublock_getCode(cp));
235 }
236 /* }}} */
237 
238 /* {{{ proto string IntlChar::charName(int|string $codepoint, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
239 ZEND_BEGIN_ARG_INFO_EX(charName_arginfo, 0, ZEND_RETURN_VALUE, 1)
240 	ZEND_ARG_INFO(0, codepoint)
241 	ZEND_ARG_INFO(0, nameChoice)
ZEND_END_ARG_INFO()242 ZEND_END_ARG_INFO()
243 IC_METHOD(charName) {
244 	UChar32 cp;
245 	zval *zcp;
246 	UErrorCode error = U_ZERO_ERROR;
247 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
248 	zend_string *buffer = NULL;
249 	int32_t buffer_len;
250 
251 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &nameChoice) == FAILURE) ||
252 	    (convert_cp(&cp, zcp) == FAILURE)) {
253 		RETURN_NULL();
254 	}
255 
256 	buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, NULL, 0, &error);
257 	buffer = zend_string_alloc(buffer_len, 0);
258 	error = U_ZERO_ERROR;
259 	buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, ZSTR_VAL(buffer), ZSTR_LEN(buffer) + 1, &error);
260 	if (U_FAILURE(error)) {
261 		zend_string_free(buffer);
262 		INTL_CHECK_STATUS_OR_NULL(error, "Failure getting character name");
263 	}
264 	RETURN_NEW_STR(buffer);
265 }
266 /* }}} */
267 
268 /* {{{ proto int IntlChar::charFromName(string $characterName, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
269 ZEND_BEGIN_ARG_INFO_EX(charFromName_arginfo, 0, ZEND_RETURN_VALUE, 1)
270 	ZEND_ARG_INFO(0, characterName)
271 	ZEND_ARG_INFO(0, nameChoice)
ZEND_END_ARG_INFO()272 ZEND_END_ARG_INFO()
273 IC_METHOD(charFromName) {
274 	char *name;
275 	size_t name_len;
276 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
277 	UChar32 ret;
278 	UErrorCode error = U_ZERO_ERROR;
279 
280 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &name, &name_len, &nameChoice) == FAILURE) {
281 		RETURN_NULL();
282 	}
283 
284 	ret = u_charFromName((UCharNameChoice)nameChoice, name, &error);
285 	INTL_CHECK_STATUS_OR_NULL(error, NULL);
286 	RETURN_LONG(ret);
287 }
288 /* }}} */
289 
290 /* {{{ void void IntlChar::enumCharNames(int|string $start, int|string $limit, callable $callback, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
291 ZEND_BEGIN_ARG_INFO_EX(enumCharNames_arginfo, 0, ZEND_RETURN_VALUE, 3)
292 	ZEND_ARG_INFO(0, start)
293 	ZEND_ARG_INFO(0, limit)
294 	ZEND_ARG_INFO(0, callback)
295 	ZEND_ARG_INFO(0, nameChoice)
296 ZEND_END_ARG_INFO();
297 typedef struct _enumCharNames_data {
298 	zend_fcall_info fci;
299 	zend_fcall_info_cache fci_cache;
300 } enumCharNames_data;
enumCharNames_callback(enumCharNames_data * context,UChar32 code,UCharNameChoice nameChoice,const char * name,int32_t length)301 static UBool enumCharNames_callback(enumCharNames_data *context,
302                                     UChar32 code, UCharNameChoice nameChoice,
303                                     const char *name, int32_t length) {
304 	zval retval;
305 	zval args[3];
306 
307 	ZVAL_NULL(&retval);
308 	ZVAL_LONG(&args[0], code);
309 	ZVAL_LONG(&args[1], nameChoice);
310 	ZVAL_STRINGL(&args[2], name, length);
311 
312 	context->fci.retval = &retval;
313 	context->fci.param_count = 3;
314 	context->fci.params = args;
315 
316 	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
317 		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
318 		intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0);
319 		zval_dtor(&retval);
320 		zval_dtor(&args[2]);
321 		return 0;
322 	}
323 	zval_dtor(&retval);
324 	zval_dtor(&args[2]);
325 	return 1;
326 }
IC_METHOD(enumCharNames)327 IC_METHOD(enumCharNames) {
328 	UChar32 start, limit;
329 	zval *zstart, *zlimit;
330 	enumCharNames_data context;
331 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
332 	UErrorCode error = U_ZERO_ERROR;
333 
334 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zzf|l", &zstart, &zlimit, &context.fci, &context.fci_cache, &nameChoice) == FAILURE) ||
335 	    (convert_cp(&start, zstart) == FAILURE) ||
336 	    (convert_cp(&limit, zlimit) == FAILURE)) {
337 		return;
338 	}
339 
340 	u_enumCharNames(start, limit, (UEnumCharNamesFn*)enumCharNames_callback, &context, nameChoice, &error);
341 	INTL_CHECK_STATUS(error, NULL);
342 }
343 /* }}} */
344 
345 /* {{{ proto string IntlChar::getPropertyName(int $property, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
346 ZEND_BEGIN_ARG_INFO_EX(getPropertyName_arginfo, 0, ZEND_RETURN_VALUE, 1)
347 	ZEND_ARG_INFO(0, property)
348 	ZEND_ARG_INFO(0, nameChoice)
349 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyName)350 IC_METHOD(getPropertyName) {
351 	zend_long property;
352 	zend_long nameChoice = U_LONG_PROPERTY_NAME;
353 	const char *ret;
354 
355 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &property, &nameChoice) == FAILURE) {
356 		return;
357 	}
358 
359 	ret = u_getPropertyName((UProperty)property, (UPropertyNameChoice)nameChoice);
360 	if (ret) {
361 		RETURN_STRING(ret);
362 	} else {
363 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
364 		intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
365 		RETURN_FALSE;
366 	}
367 }
368 /* }}} */
369 
370 /* {{{ proto int IntlChar::getPropertyEnum(string $alias) */
371 ZEND_BEGIN_ARG_INFO_EX(getPropertyEnum_arginfo, 0, ZEND_RETURN_VALUE, 1)
372 	ZEND_ARG_INFO(0, alias)
373 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyEnum)374 IC_METHOD(getPropertyEnum) {
375 	char *alias;
376 	size_t alias_len;
377 
378 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &alias, &alias_len) == FAILURE) {
379 		return;
380 	}
381 
382 	RETURN_LONG(u_getPropertyEnum(alias));
383 }
384 /* }}} */
385 
386 /* {{{ proto string IntlChar::getPropertyValueName(int $property, int $value[, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
387 ZEND_BEGIN_ARG_INFO_EX(getPropertyValueName_arginfo, 0, ZEND_RETURN_VALUE, 2)
388 	ZEND_ARG_INFO(0, property)
389 	ZEND_ARG_INFO(0, value)
390 	ZEND_ARG_INFO(0, nameChoice)
391 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyValueName)392 IC_METHOD(getPropertyValueName) {
393 	zend_long property, value, nameChoice = U_LONG_PROPERTY_NAME;
394 	const char *ret;
395 
396 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll|l", &property, &value, &nameChoice) == FAILURE) {
397 		return;
398 	}
399 
400 	ret = u_getPropertyValueName((UProperty)property, value, (UPropertyNameChoice)nameChoice);
401 	if (ret) {
402 		RETURN_STRING(ret);
403 	} else {
404 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
405 		intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
406 		RETURN_FALSE;
407 	}
408 }
409 /* }}} */
410 
411 /* {{{ proto int IntlChar::getPropertyValueEnum(int $property, string $name) */
412 ZEND_BEGIN_ARG_INFO_EX(getPropertyValueEnum_arginfo, 0, ZEND_RETURN_VALUE, 2)
413 	ZEND_ARG_INFO(0, property)
414 	ZEND_ARG_INFO(0, name)
415 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyValueEnum)416 IC_METHOD(getPropertyValueEnum) {
417 	zend_long property;
418 	char *name;
419 	size_t name_len;
420 
421 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ls", &property, &name, &name_len) == FAILURE) {
422 		return;
423 	}
424 
425 	RETURN_LONG(u_getPropertyValueEnum((UProperty)property, name));
426 }
427 /* }}} */
428 
429 /* {{{ proto int|string IntlChar::foldCase(int|string $codepoint, int $options = IntlChar::FOLD_CASE_DEFAULT) */
430 ZEND_BEGIN_ARG_INFO_EX(foldCase_arginfo, 0, ZEND_RETURN_VALUE, 1)
431 	ZEND_ARG_INFO(0, codepoint)
432 	ZEND_ARG_INFO(0, options)
433 ZEND_END_ARG_INFO();
IC_METHOD(foldCase)434 IC_METHOD(foldCase) {
435 	UChar32 cp, ret;
436 	zval *zcp;
437 	zend_long options = U_FOLD_CASE_DEFAULT;
438 
439 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &options) == FAILURE) ||
440 	    (convert_cp(&cp, zcp) == FAILURE)) {
441 		return;
442 	}
443 
444 	ret = u_foldCase(cp, options);
445 	if (Z_TYPE_P(zcp) == IS_STRING) {
446 		char buffer[5];
447 		int buffer_len = 0;
448 		U8_APPEND_UNSAFE(buffer, buffer_len, ret);
449 		buffer[buffer_len] = 0;
450 		RETURN_STRINGL(buffer, buffer_len);
451 	} else {
452 		RETURN_LONG(ret);
453 	}
454 }
455 /* }}} */
456 
457 /* {{{ proto int IntlChar::digit(int|string $codepoint[, int $radix = 10]) */
458 ZEND_BEGIN_ARG_INFO_EX(digit_arginfo, 0, ZEND_RETURN_VALUE, 1)
459 	ZEND_ARG_INFO(0, codepoint)
460 	ZEND_ARG_INFO(0, radix)
461 ZEND_END_ARG_INFO();
IC_METHOD(digit)462 IC_METHOD(digit) {
463 	UChar32 cp;
464 	zval *zcp;
465 	zend_long radix = 10;
466 	int ret;
467 
468 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &radix) == FAILURE) ||
469 	    (convert_cp(&cp, zcp) == FAILURE)) {
470 		return;
471 	}
472 
473 	ret = u_digit(cp, radix);
474 	if (ret < 0) {
475 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
476 		intl_error_set_custom_msg(NULL, "Invalid digit", 0);
477 		RETURN_FALSE;
478 	}
479 	RETURN_LONG(ret);
480 }
481 /* }}} */
482 
483 /* {{{ proto int IntlChar::forDigit(int $digit[, int $radix = 10]) */
484 ZEND_BEGIN_ARG_INFO_EX(forDigit_arginfo, 0, ZEND_RETURN_VALUE, 1)
485 	ZEND_ARG_INFO(0, digit)
486 	ZEND_ARG_INFO(0, radix)
487 ZEND_END_ARG_INFO();
IC_METHOD(forDigit)488 IC_METHOD(forDigit) {
489 	zend_long digit, radix = 10;
490 
491 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &digit, &radix) == FAILURE) {
492 		return;
493 	}
494 
495 	RETURN_LONG(u_forDigit(digit, radix));
496 }
497 /* }}} */
498 
499 /* {{{ proto array IntlChar::charAge(int|string $codepoint) */
500 ZEND_BEGIN_ARG_INFO_EX(charAge_arginfo, 0, ZEND_RETURN_VALUE, 1)
501 	ZEND_ARG_INFO(0, codepoint)
502 ZEND_END_ARG_INFO();
IC_METHOD(charAge)503 IC_METHOD(charAge) {
504 	UChar32 cp;
505 	zval *zcp;
506 	UVersionInfo version;
507 	int i;
508 
509 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
510 	    (convert_cp(&cp, zcp) == FAILURE)) {
511 		return;
512 	}
513 
514 	u_charAge(cp, version);
515 	array_init(return_value);
516 	for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
517 		add_next_index_long(return_value, version[i]);
518 	}
519 }
520 /* }}} */
521 
522 /* {{{ proto array IntlChar::getUnicodeVersion() */
523 ZEND_BEGIN_ARG_INFO_EX(getUnicodeVersion_arginfo, 0, ZEND_RETURN_VALUE, 0)
524 ZEND_END_ARG_INFO();
IC_METHOD(getUnicodeVersion)525 IC_METHOD(getUnicodeVersion) {
526 	UVersionInfo version;
527 	int i;
528 
529 
530 	u_getUnicodeVersion(version);
531 	array_init(return_value);
532 	for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
533 		add_next_index_long(return_value, version[i]);
534 	}
535 }
536 /* }}} */
537 
538 /* {{{ proto string IntlChar::getFC_NFKC_Closure(int|string $codepoint) */
539 ZEND_BEGIN_ARG_INFO_EX(getFC_NFKC_Closure_arginfo, 0, ZEND_RETURN_VALUE, 1)
540 	ZEND_ARG_INFO(0, codepoint)
541 ZEND_END_ARG_INFO();
IC_METHOD(getFC_NFKC_Closure)542 IC_METHOD(getFC_NFKC_Closure) {
543 	UChar32 cp;
544 	zval *zcp;
545 	UChar *closure;
546 	zend_string *u8str;
547 	int32_t closure_len;
548 	UErrorCode error = U_ZERO_ERROR;
549 
550 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
551 	    (convert_cp(&cp, zcp) == FAILURE)) {
552 		return;
553 	}
554 
555 	closure_len = u_getFC_NFKC_Closure(cp, NULL, 0, &error);
556 	if (closure_len == 0) {
557 		RETURN_EMPTY_STRING();
558 	}
559 	closure = safe_emalloc(sizeof(UChar), closure_len + 1, 0);
560 	error = U_ZERO_ERROR;
561 	closure_len = u_getFC_NFKC_Closure(cp, closure, closure_len, &error);
562 	if (U_FAILURE(error)) {
563 		efree(closure);
564 		INTL_CHECK_STATUS(error, "Failed getting closure");
565 	}
566 
567 	error = U_ZERO_ERROR;
568 	u8str = intl_convert_utf16_to_utf8(closure, closure_len, &error);
569 	INTL_CHECK_STATUS(error, "Failed converting output to UTF8");
570 	efree(closure);
571 	RETVAL_NEW_STR(u8str);
572 }
573 /* }}} */
574 
575 /* {{{ proto bool IntlChar::<name>(int|string $codepoint) */
576 #define IC_BOOL_METHOD_CHAR(name) \
577 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
578 	ZEND_ARG_INFO(0, codepoint) \
579 ZEND_END_ARG_INFO(); \
580 IC_METHOD(name) { \
581 	UChar32 cp; zval *zcp; \
582 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
583 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
584 	RETURN_BOOL(u_##name(cp)); \
585 }
586 IC_BOOL_METHOD_CHAR(isUAlphabetic)
587 IC_BOOL_METHOD_CHAR(isULowercase)
588 IC_BOOL_METHOD_CHAR(isUUppercase)
589 IC_BOOL_METHOD_CHAR(isUWhiteSpace)
590 IC_BOOL_METHOD_CHAR(islower)
591 IC_BOOL_METHOD_CHAR(isupper)
592 IC_BOOL_METHOD_CHAR(istitle)
593 IC_BOOL_METHOD_CHAR(isdigit)
594 IC_BOOL_METHOD_CHAR(isalpha)
595 IC_BOOL_METHOD_CHAR(isalnum)
596 IC_BOOL_METHOD_CHAR(isxdigit)
597 IC_BOOL_METHOD_CHAR(ispunct)
598 IC_BOOL_METHOD_CHAR(isgraph)
599 IC_BOOL_METHOD_CHAR(isblank)
600 IC_BOOL_METHOD_CHAR(isdefined)
601 IC_BOOL_METHOD_CHAR(isspace)
602 IC_BOOL_METHOD_CHAR(isJavaSpaceChar)
603 IC_BOOL_METHOD_CHAR(isWhitespace)
604 IC_BOOL_METHOD_CHAR(iscntrl)
605 IC_BOOL_METHOD_CHAR(isISOControl)
606 IC_BOOL_METHOD_CHAR(isprint)
607 IC_BOOL_METHOD_CHAR(isbase)
608 IC_BOOL_METHOD_CHAR(isMirrored)
609 IC_BOOL_METHOD_CHAR(isIDStart)
610 IC_BOOL_METHOD_CHAR(isIDPart)
611 IC_BOOL_METHOD_CHAR(isIDIgnorable)
612 IC_BOOL_METHOD_CHAR(isJavaIDStart)
613 IC_BOOL_METHOD_CHAR(isJavaIDPart)
614 #undef IC_BOOL_METHOD_CHAR
615 /* }}} */
616 
617 /* {{{ proto int IntlChar::<name>(int|string $codepoint) */
618 #define IC_INT_METHOD_CHAR(name) \
619 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
620 	ZEND_ARG_INFO(0, codepoint) \
621 ZEND_END_ARG_INFO(); \
622 IC_METHOD(name) { \
623 	UChar32 cp; zval *zcp; \
624 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
625 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
626 	RETURN_LONG(u_##name(cp)); \
627 }
628 IC_INT_METHOD_CHAR(charDirection)
629 IC_INT_METHOD_CHAR(charType)
630 IC_INT_METHOD_CHAR(getCombiningClass)
631 IC_INT_METHOD_CHAR(charDigitValue)
632 #undef IC_INT_METHOD_CHAR
633 /* }}} */
634 
635 /* {{{ proto int|string IntlChar::<name>(int|string $codepoint)
636  * Returns a utf-8 character if codepoint was passed as a utf-8 sequence
637  * Returns an int otherwise
638  */
639 #define IC_CHAR_METHOD_CHAR(name) \
640 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
641 	ZEND_ARG_INFO(0, codepoint) \
642 ZEND_END_ARG_INFO(); \
643 IC_METHOD(name) { \
644 	UChar32 cp, ret; zval *zcp; \
645 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
646 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
647 	ret = u_##name(cp); \
648 	if (Z_TYPE_P(zcp) == IS_STRING) { \
649 		char buffer[5]; \
650 		int buffer_len = 0; \
651 		U8_APPEND_UNSAFE(buffer, buffer_len, ret); \
652 		buffer[buffer_len] = 0; \
653 		RETURN_STRINGL(buffer, buffer_len); \
654 	} else { \
655 		RETURN_LONG(ret); \
656 	} \
657 }
658 IC_CHAR_METHOD_CHAR(charMirror)
659 IC_CHAR_METHOD_CHAR(tolower)
660 IC_CHAR_METHOD_CHAR(toupper)
661 IC_CHAR_METHOD_CHAR(totitle)
662 #if U_ICU_VERSION_MAJOR_NUM >= 52
663 IC_CHAR_METHOD_CHAR(getBidiPairedBracket)
664 #endif /* ICU >= 52 */
665 #undef IC_CHAR_METHOD_CHAR
666 /* }}} */
667 
668 static zend_function_entry intlchar_methods[] = {
669 #define IC_ME(mname) PHP_ME(IntlChar, mname, mname##_arginfo, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
670 	IC_ME(chr)
671 	IC_ME(ord)
672 	IC_ME(hasBinaryProperty)
673 	IC_ME(isUAlphabetic)
674 	IC_ME(isULowercase)
675 	IC_ME(isUUppercase)
676 	IC_ME(isUWhiteSpace)
677 	IC_ME(getIntPropertyValue)
678 	IC_ME(getIntPropertyMinValue)
679 	IC_ME(getIntPropertyMaxValue)
680 	IC_ME(getNumericValue)
681 	IC_ME(islower)
682 	IC_ME(isupper)
683 	IC_ME(istitle)
684 	IC_ME(isdigit)
685 	IC_ME(isalpha)
686 	IC_ME(isalnum)
687 	IC_ME(isxdigit)
688 	IC_ME(ispunct)
689 	IC_ME(isgraph)
690 	IC_ME(isblank)
691 	IC_ME(isdefined)
692 	IC_ME(isspace)
693 	IC_ME(isJavaSpaceChar)
694 	IC_ME(isWhitespace)
695 	IC_ME(iscntrl)
696 	IC_ME(isISOControl)
697 	IC_ME(isprint)
698 	IC_ME(isbase)
699 	IC_ME(charDirection)
700 	IC_ME(isMirrored)
701 	IC_ME(charMirror)
702 #if U_ICU_VERSION_MAJOR_NUM >= 52
703 	IC_ME(getBidiPairedBracket)
704 #endif /* ICU >= 52 */
705 	IC_ME(charType)
706 	IC_ME(enumCharTypes)
707 	IC_ME(getCombiningClass)
708 	IC_ME(charDigitValue)
709 	IC_ME(getBlockCode)
710 	IC_ME(charName)
711 	IC_ME(charFromName)
712 	IC_ME(enumCharNames)
713 	IC_ME(getPropertyName)
714 	IC_ME(getPropertyEnum)
715 	IC_ME(getPropertyValueName)
716 	IC_ME(getPropertyValueEnum)
717 	IC_ME(isIDStart)
718 	IC_ME(isIDPart)
719 	IC_ME(isIDIgnorable)
720 	IC_ME(isJavaIDStart)
721 	IC_ME(isJavaIDPart)
722 	IC_ME(tolower)
723 	IC_ME(toupper)
724 	IC_ME(totitle)
725 	IC_ME(foldCase)
726 	IC_ME(digit)
727 	IC_ME(forDigit)
728 	IC_ME(charAge)
729 	IC_ME(getUnicodeVersion)
730 	IC_ME(getFC_NFKC_Closure)
731 #undef IC_ME
732 	PHP_FE_END
733 };
734 
php_uchar_minit(INIT_FUNC_ARGS)735 int php_uchar_minit(INIT_FUNC_ARGS) {
736 	zend_class_entry tmp, *ce;
737 
738 	INIT_CLASS_ENTRY(tmp, "IntlChar", intlchar_methods);
739 	ce = zend_register_internal_class(&tmp);
740 
741 #define IC_CONSTL(name, val) \
742 	zend_declare_class_constant_long(ce, name, strlen(name), val);
743 
744 	zend_declare_class_constant_string(ce, "UNICODE_VERSION", sizeof("UNICODE_VERISON")-1, U_UNICODE_VERSION);
745 	IC_CONSTL("CODEPOINT_MIN", UCHAR_MIN_VALUE)
746 	IC_CONSTL("CODEPOINT_MAX", UCHAR_MAX_VALUE)
747 	IC_CONSTL("FOLD_CASE_DEFAULT", U_FOLD_CASE_DEFAULT)
748 	IC_CONSTL("FOLD_CASE_EXCLUDE_SPECIAL_I", U_FOLD_CASE_EXCLUDE_SPECIAL_I)
749 	zend_declare_class_constant_double(ce, "NO_NUMERIC_VALUE", sizeof("NO_NUMERIC_VALUE")-1, U_NO_NUMERIC_VALUE);
750 
751 	/* All enums used by the uchar APIs.  There are a LOT of them,
752 	 * so they're separated out into include files,
753 	 * leaving this source file for actual implementation.
754 	 */
755 #define UPROPERTY(name) IC_CONSTL("PROPERTY_" #name, UCHAR_##name)
756 #include "uproperty-enum.h"
757 #undef UPROPERTY
758 
759 #define UCHARCATEGORY(name) IC_CONSTL("CHAR_CATEGORY_" #name, U_##name)
760 #include "ucharcategory-enum.h"
761 #undef UCHARCATEGORY
762 
763 #define UCHARDIRECTION(name) IC_CONSTL("CHAR_DIRECTION_" #name, U_##name)
764 #include "uchardirection-enum.h"
765 #undef UCHARDIRECTION
766 
767 #define UBLOCKCODE(name) IC_CONSTL("BLOCK_CODE_" #name, UBLOCK_##name)
768 #include "ublockcode-enum.h"
769 #undef UBLOCKCODE
770 
771 	/* Smaller, self-destribing enums */
772 #define UOTHER(name) IC_CONSTL(#name, U_##name)
773 #include "uother-enum.h"
774 #undef UOTHER
775 
776 #undef IC_CONSTL
777 #undef IC_CONSTS
778 
779 	return SUCCESS;
780 }
781