xref: /PHP-7.3/ext/intl/uchar/uchar.c (revision 1c850bfc)
1 #include "uchar.h"
2 #include "intl_data.h"
3 #include "intl_convert.h"
4 
5 #include <unicode/uchar.h>
6 #if U_ICU_VERSION_MAJOR_NUM >= 49
7 #include <unicode/utf8.h>
8 #endif
9 
10 #define IC_METHOD(mname) PHP_METHOD(IntlChar, mname)
11 
convert_cp(UChar32 * pcp,zval * zcp)12 static inline int convert_cp(UChar32* pcp, zval *zcp) {
13 	zend_long cp = -1;
14 
15 	if (Z_TYPE_P(zcp) == IS_LONG) {
16 		cp = Z_LVAL_P(zcp);
17 	} else if (Z_TYPE_P(zcp) == IS_STRING) {
18 		int32_t i = 0;
19 		size_t zcp_len = Z_STRLEN_P(zcp);
20 
21 		if (ZEND_SIZE_T_INT_OVFL(zcp_len)) {
22 			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
23 			intl_error_set_custom_msg(NULL, "Input string is too long.", 0);
24 			return FAILURE;
25 		}
26 
27 		U8_NEXT(Z_STRVAL_P(zcp), i, zcp_len, cp);
28 		if ((size_t)i != zcp_len) {
29 			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
30 			intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
31 			return FAILURE;
32 		}
33 	} else {
34 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
35 		intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point.  Must be either integer or UTF-8 sequence.", 0);
36 		return FAILURE;
37 	}
38 	if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) {
39 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
40 		intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
41 		return FAILURE;
42 	}
43 	*pcp = (UChar32)cp;
44 	return SUCCESS;
45 }
46 
47 /* {{{ proto string IntlChar::chr(int|string $codepoint)
48  * Converts a numeric codepoint to UTF-8
49  * Acts as an identify function when given a valid UTF-8 encoded codepoint
50  */
51 ZEND_BEGIN_ARG_INFO_EX(chr_arginfo, 0, ZEND_RETURN_VALUE, 1)
52 	ZEND_ARG_INFO(0, codepoint)
53 ZEND_END_ARG_INFO();
IC_METHOD(chr)54 IC_METHOD(chr) {
55 	UChar32 cp;
56 	zval *zcp;
57 	char buffer[5];
58 	int buffer_len = 0;
59 
60 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
61 	    (convert_cp(&cp, zcp) == FAILURE)) {
62 		return;
63 	}
64 
65 	/* We can use unsafe because we know the codepoint is in valid range
66 	 * and that 4 bytes is enough for any unicode point
67 	 */
68 	U8_APPEND_UNSAFE(buffer, buffer_len, cp);
69 	buffer[buffer_len] = 0;
70 	RETURN_STRINGL(buffer, buffer_len);
71 }
72 /* }}} */
73 
74 /* {{{ proto int IntlChar::ord(int|string $character)
75  * Converts a UTf-8 encoded codepoint to its integer U32 value
76  * Acts as an identity function when passed a valid integer codepoint
77  */
78 ZEND_BEGIN_ARG_INFO_EX(ord_arginfo, 0, ZEND_RETURN_VALUE, 1)
79 	ZEND_ARG_INFO(0, character)
80 ZEND_END_ARG_INFO();
IC_METHOD(ord)81 IC_METHOD(ord) {
82 	UChar32 cp;
83 	zval *zcp;
84 
85 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
86 	    (convert_cp(&cp, zcp) == FAILURE)) {
87 		return;
88 	}
89 
90 	RETURN_LONG(cp);
91 }
92 /* }}} */
93 
94 /* {{{ proto bool IntlChar::hasBinaryProperty(int|string $codepoint, int $property) */
95 ZEND_BEGIN_ARG_INFO_EX(hasBinaryProperty_arginfo, 0, ZEND_RETURN_VALUE, 2)
96 	ZEND_ARG_INFO(0, codepoint)
97 	ZEND_ARG_INFO(0, property)
98 ZEND_END_ARG_INFO();
IC_METHOD(hasBinaryProperty)99 IC_METHOD(hasBinaryProperty) {
100 	UChar32 cp;
101 	zend_long prop;
102 	zval *zcp;
103 
104 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
105 	    (convert_cp(&cp, zcp) == FAILURE)) {
106 		return;
107 	}
108 
109 	RETURN_BOOL(u_hasBinaryProperty(cp, (UProperty)prop));
110 }
111 /* }}} */
112 
113 /* {{{ proto int IntlChar::getIntPropertyValue(int|string $codepoint, int $property) */
114 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyValue_arginfo, 0, ZEND_RETURN_VALUE, 2)
115 	ZEND_ARG_INFO(0, codepoint)
116 	ZEND_ARG_INFO(0, property)
117 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyValue)118 IC_METHOD(getIntPropertyValue) {
119 	UChar32 cp;
120 	zend_long prop;
121 	zval *zcp;
122 
123 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
124 	    (convert_cp(&cp, zcp) == FAILURE)) {
125 		return;
126 	}
127 
128 	RETURN_LONG(u_getIntPropertyValue(cp, (UProperty)prop));
129 }
130 /* }}} */
131 
132 /* {{{ proto int IntlChar::getIntPropertyMinValue(int $property) */
133 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMinValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
134 	ZEND_ARG_INFO(0, property)
135 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyMinValue)136 IC_METHOD(getIntPropertyMinValue) {
137 	zend_long prop;
138 
139 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
140 		return;
141 	}
142 
143 	RETURN_LONG(u_getIntPropertyMinValue((UProperty)prop));
144 }
145 /* }}} */
146 
147 /* {{{ proto int IntlChar::getIntPropertyMaxValue(int $property) */
148 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMaxValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
149 	ZEND_ARG_INFO(0, property)
150 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyMaxValue)151 IC_METHOD(getIntPropertyMaxValue) {
152 	zend_long prop;
153 
154 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
155 		return;
156 	}
157 
158 	RETURN_LONG(u_getIntPropertyMaxValue((UProperty)prop));
159 }
160 /* }}} */
161 
162 /* {{{ proto float IntlChar::getNumericValue(int|string $codepoint) */
163 ZEND_BEGIN_ARG_INFO_EX(getNumericValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
164 	ZEND_ARG_INFO(0, codepoint)
165 ZEND_END_ARG_INFO();
IC_METHOD(getNumericValue)166 IC_METHOD(getNumericValue) {
167 	UChar32 cp;
168 	zval *zcp;
169 
170 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
171 	    (convert_cp(&cp, zcp) == FAILURE)) {
172 		return;
173 	}
174 
175 	RETURN_DOUBLE(u_getNumericValue(cp));
176 }
177 /* }}} */
178 
179 /* {{{ proto void IntlChar::enumCharTypes(callable $callback) */
180 ZEND_BEGIN_ARG_INFO_EX(enumCharTypes_arginfo, 0, ZEND_RETURN_VALUE, 0)
181 	ZEND_ARG_INFO(0, callback)
182 ZEND_END_ARG_INFO();
183 typedef struct _enumCharType_data {
184 	zend_fcall_info fci;
185 	zend_fcall_info_cache fci_cache;
186 } enumCharType_data;
enumCharType_callback(enumCharType_data * context,UChar32 start,UChar32 limit,UCharCategory type)187 static UBool enumCharType_callback(enumCharType_data *context,
188                                    UChar32 start, UChar32 limit,
189                                    UCharCategory type) {
190 	zval retval;
191 	zval args[3];
192 
193 	ZVAL_NULL(&retval);
194 	/* Note that $start is INclusive, while $limit is EXclusive
195 	 * Therefore (0, 32, 15) means CPs 0..31 are of type 15
196 	 */
197 	ZVAL_LONG(&args[0], start);
198 	ZVAL_LONG(&args[1], limit);
199 	ZVAL_LONG(&args[2], type);
200 
201 	context->fci.retval = &retval;
202 	context->fci.param_count = 3;
203 	context->fci.params = args;
204 
205 	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
206 		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
207 		intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0);
208 		zval_ptr_dtor(&retval);
209 		return 0;
210 	}
211 	zval_ptr_dtor(&retval);
212 	return 1;
213 }
IC_METHOD(enumCharTypes)214 IC_METHOD(enumCharTypes) {
215 	enumCharType_data context;
216 
217 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "f", &context.fci, &context.fci_cache) == FAILURE) {
218 		return;
219 	}
220 	u_enumCharTypes((UCharEnumTypeRange*)enumCharType_callback, &context);
221 }
222 /* }}} */
223 
224 /* {{{ proto int IntlChar::getBlockCode(int|string $codepoint) */
225 ZEND_BEGIN_ARG_INFO_EX(getBlockCode_arginfo, 0, ZEND_RETURN_VALUE, 1)
226 	ZEND_ARG_INFO(0, codepoint)
ZEND_END_ARG_INFO()227 ZEND_END_ARG_INFO()
228 IC_METHOD(getBlockCode) {
229 	UChar32 cp;
230 	zval *zcp;
231 
232 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
233 	    (convert_cp(&cp, zcp) == FAILURE)) {
234 		return;
235 	}
236 
237 	RETURN_LONG(ublock_getCode(cp));
238 }
239 /* }}} */
240 
241 /* {{{ proto string IntlChar::charName(int|string $codepoint, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
242 ZEND_BEGIN_ARG_INFO_EX(charName_arginfo, 0, ZEND_RETURN_VALUE, 1)
243 	ZEND_ARG_INFO(0, codepoint)
244 	ZEND_ARG_INFO(0, nameChoice)
ZEND_END_ARG_INFO()245 ZEND_END_ARG_INFO()
246 IC_METHOD(charName) {
247 	UChar32 cp;
248 	zval *zcp;
249 	UErrorCode error = U_ZERO_ERROR;
250 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
251 	zend_string *buffer = NULL;
252 	int32_t buffer_len;
253 
254 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &nameChoice) == FAILURE) ||
255 	    (convert_cp(&cp, zcp) == FAILURE)) {
256 		RETURN_NULL();
257 	}
258 
259 	buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, NULL, 0, &error);
260 	buffer = zend_string_alloc(buffer_len, 0);
261 	error = U_ZERO_ERROR;
262 	buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, ZSTR_VAL(buffer), ZSTR_LEN(buffer) + 1, &error);
263 	if (U_FAILURE(error)) {
264 		zend_string_efree(buffer);
265 		INTL_CHECK_STATUS_OR_NULL(error, "Failure getting character name");
266 	}
267 	RETURN_NEW_STR(buffer);
268 }
269 /* }}} */
270 
271 /* {{{ proto int IntlChar::charFromName(string $characterName, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
272 ZEND_BEGIN_ARG_INFO_EX(charFromName_arginfo, 0, ZEND_RETURN_VALUE, 1)
273 	ZEND_ARG_INFO(0, characterName)
274 	ZEND_ARG_INFO(0, nameChoice)
ZEND_END_ARG_INFO()275 ZEND_END_ARG_INFO()
276 IC_METHOD(charFromName) {
277 	char *name;
278 	size_t name_len;
279 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
280 	UChar32 ret;
281 	UErrorCode error = U_ZERO_ERROR;
282 
283 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &name, &name_len, &nameChoice) == FAILURE) {
284 		RETURN_NULL();
285 	}
286 
287 	ret = u_charFromName((UCharNameChoice)nameChoice, name, &error);
288 	INTL_CHECK_STATUS_OR_NULL(error, NULL);
289 	RETURN_LONG(ret);
290 }
291 /* }}} */
292 
293 /* {{{ void void IntlChar::enumCharNames(int|string $start, int|string $limit, callable $callback, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
294 ZEND_BEGIN_ARG_INFO_EX(enumCharNames_arginfo, 0, ZEND_RETURN_VALUE, 3)
295 	ZEND_ARG_INFO(0, start)
296 	ZEND_ARG_INFO(0, limit)
297 	ZEND_ARG_INFO(0, callback)
298 	ZEND_ARG_INFO(0, nameChoice)
299 ZEND_END_ARG_INFO();
300 typedef struct _enumCharNames_data {
301 	zend_fcall_info fci;
302 	zend_fcall_info_cache fci_cache;
303 } enumCharNames_data;
enumCharNames_callback(enumCharNames_data * context,UChar32 code,UCharNameChoice nameChoice,const char * name,int32_t length)304 static UBool enumCharNames_callback(enumCharNames_data *context,
305                                     UChar32 code, UCharNameChoice nameChoice,
306                                     const char *name, int32_t length) {
307 	zval retval;
308 	zval args[3];
309 
310 	ZVAL_NULL(&retval);
311 	ZVAL_LONG(&args[0], code);
312 	ZVAL_LONG(&args[1], nameChoice);
313 	ZVAL_STRINGL(&args[2], name, length);
314 
315 	context->fci.retval = &retval;
316 	context->fci.param_count = 3;
317 	context->fci.params = args;
318 
319 	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
320 		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
321 		intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0);
322 		zval_ptr_dtor(&retval);
323 		zval_ptr_dtor_str(&args[2]);
324 		return 0;
325 	}
326 	zval_ptr_dtor(&retval);
327 	zval_ptr_dtor_str(&args[2]);
328 	return 1;
329 }
IC_METHOD(enumCharNames)330 IC_METHOD(enumCharNames) {
331 	UChar32 start, limit;
332 	zval *zstart, *zlimit;
333 	enumCharNames_data context;
334 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
335 	UErrorCode error = U_ZERO_ERROR;
336 
337 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zzf|l", &zstart, &zlimit, &context.fci, &context.fci_cache, &nameChoice) == FAILURE) ||
338 	    (convert_cp(&start, zstart) == FAILURE) ||
339 	    (convert_cp(&limit, zlimit) == FAILURE)) {
340 		return;
341 	}
342 
343 	u_enumCharNames(start, limit, (UEnumCharNamesFn*)enumCharNames_callback, &context, nameChoice, &error);
344 	INTL_CHECK_STATUS(error, NULL);
345 }
346 /* }}} */
347 
348 /* {{{ proto string IntlChar::getPropertyName(int $property, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
349 ZEND_BEGIN_ARG_INFO_EX(getPropertyName_arginfo, 0, ZEND_RETURN_VALUE, 1)
350 	ZEND_ARG_INFO(0, property)
351 	ZEND_ARG_INFO(0, nameChoice)
352 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyName)353 IC_METHOD(getPropertyName) {
354 	zend_long property;
355 	zend_long nameChoice = U_LONG_PROPERTY_NAME;
356 	const char *ret;
357 
358 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &property, &nameChoice) == FAILURE) {
359 		return;
360 	}
361 
362 	ret = u_getPropertyName((UProperty)property, (UPropertyNameChoice)nameChoice);
363 	if (ret) {
364 		RETURN_STRING(ret);
365 	} else {
366 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
367 		intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
368 		RETURN_FALSE;
369 	}
370 }
371 /* }}} */
372 
373 /* {{{ proto int IntlChar::getPropertyEnum(string $alias) */
374 ZEND_BEGIN_ARG_INFO_EX(getPropertyEnum_arginfo, 0, ZEND_RETURN_VALUE, 1)
375 	ZEND_ARG_INFO(0, alias)
376 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyEnum)377 IC_METHOD(getPropertyEnum) {
378 	char *alias;
379 	size_t alias_len;
380 
381 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &alias, &alias_len) == FAILURE) {
382 		return;
383 	}
384 
385 	RETURN_LONG(u_getPropertyEnum(alias));
386 }
387 /* }}} */
388 
389 /* {{{ proto string IntlChar::getPropertyValueName(int $property, int $value[, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
390 ZEND_BEGIN_ARG_INFO_EX(getPropertyValueName_arginfo, 0, ZEND_RETURN_VALUE, 2)
391 	ZEND_ARG_INFO(0, property)
392 	ZEND_ARG_INFO(0, value)
393 	ZEND_ARG_INFO(0, nameChoice)
394 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyValueName)395 IC_METHOD(getPropertyValueName) {
396 	zend_long property, value, nameChoice = U_LONG_PROPERTY_NAME;
397 	const char *ret;
398 
399 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll|l", &property, &value, &nameChoice) == FAILURE) {
400 		return;
401 	}
402 
403 	ret = u_getPropertyValueName((UProperty)property, value, (UPropertyNameChoice)nameChoice);
404 	if (ret) {
405 		RETURN_STRING(ret);
406 	} else {
407 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
408 		intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
409 		RETURN_FALSE;
410 	}
411 }
412 /* }}} */
413 
414 /* {{{ proto int IntlChar::getPropertyValueEnum(int $property, string $name) */
415 ZEND_BEGIN_ARG_INFO_EX(getPropertyValueEnum_arginfo, 0, ZEND_RETURN_VALUE, 2)
416 	ZEND_ARG_INFO(0, property)
417 	ZEND_ARG_INFO(0, name)
418 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyValueEnum)419 IC_METHOD(getPropertyValueEnum) {
420 	zend_long property;
421 	char *name;
422 	size_t name_len;
423 
424 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ls", &property, &name, &name_len) == FAILURE) {
425 		return;
426 	}
427 
428 	RETURN_LONG(u_getPropertyValueEnum((UProperty)property, name));
429 }
430 /* }}} */
431 
432 /* {{{ proto int|string IntlChar::foldCase(int|string $codepoint, int $options = IntlChar::FOLD_CASE_DEFAULT) */
433 ZEND_BEGIN_ARG_INFO_EX(foldCase_arginfo, 0, ZEND_RETURN_VALUE, 1)
434 	ZEND_ARG_INFO(0, codepoint)
435 	ZEND_ARG_INFO(0, options)
436 ZEND_END_ARG_INFO();
IC_METHOD(foldCase)437 IC_METHOD(foldCase) {
438 	UChar32 cp, ret;
439 	zval *zcp;
440 	zend_long options = U_FOLD_CASE_DEFAULT;
441 
442 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &options) == FAILURE) ||
443 	    (convert_cp(&cp, zcp) == FAILURE)) {
444 		return;
445 	}
446 
447 	ret = u_foldCase(cp, options);
448 	if (Z_TYPE_P(zcp) == IS_STRING) {
449 		char buffer[5];
450 		int buffer_len = 0;
451 		U8_APPEND_UNSAFE(buffer, buffer_len, ret);
452 		buffer[buffer_len] = 0;
453 		RETURN_STRINGL(buffer, buffer_len);
454 	} else {
455 		RETURN_LONG(ret);
456 	}
457 }
458 /* }}} */
459 
460 /* {{{ proto int IntlChar::digit(int|string $codepoint[, int $radix = 10]) */
461 ZEND_BEGIN_ARG_INFO_EX(digit_arginfo, 0, ZEND_RETURN_VALUE, 1)
462 	ZEND_ARG_INFO(0, codepoint)
463 	ZEND_ARG_INFO(0, radix)
464 ZEND_END_ARG_INFO();
IC_METHOD(digit)465 IC_METHOD(digit) {
466 	UChar32 cp;
467 	zval *zcp;
468 	zend_long radix = 10;
469 	int ret;
470 
471 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &radix) == FAILURE) ||
472 	    (convert_cp(&cp, zcp) == FAILURE)) {
473 		return;
474 	}
475 
476 	ret = u_digit(cp, radix);
477 	if (ret < 0) {
478 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
479 		intl_error_set_custom_msg(NULL, "Invalid digit", 0);
480 		RETURN_FALSE;
481 	}
482 	RETURN_LONG(ret);
483 }
484 /* }}} */
485 
486 /* {{{ proto int IntlChar::forDigit(int $digit[, int $radix = 10]) */
487 ZEND_BEGIN_ARG_INFO_EX(forDigit_arginfo, 0, ZEND_RETURN_VALUE, 1)
488 	ZEND_ARG_INFO(0, digit)
489 	ZEND_ARG_INFO(0, radix)
490 ZEND_END_ARG_INFO();
IC_METHOD(forDigit)491 IC_METHOD(forDigit) {
492 	zend_long digit, radix = 10;
493 
494 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &digit, &radix) == FAILURE) {
495 		return;
496 	}
497 
498 	RETURN_LONG(u_forDigit(digit, radix));
499 }
500 /* }}} */
501 
502 /* {{{ proto array IntlChar::charAge(int|string $codepoint) */
503 ZEND_BEGIN_ARG_INFO_EX(charAge_arginfo, 0, ZEND_RETURN_VALUE, 1)
504 	ZEND_ARG_INFO(0, codepoint)
505 ZEND_END_ARG_INFO();
IC_METHOD(charAge)506 IC_METHOD(charAge) {
507 	UChar32 cp;
508 	zval *zcp;
509 	UVersionInfo version;
510 	int i;
511 
512 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
513 	    (convert_cp(&cp, zcp) == FAILURE)) {
514 		return;
515 	}
516 
517 	u_charAge(cp, version);
518 	array_init(return_value);
519 	for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
520 		add_next_index_long(return_value, version[i]);
521 	}
522 }
523 /* }}} */
524 
525 /* {{{ proto array IntlChar::getUnicodeVersion() */
526 ZEND_BEGIN_ARG_INFO_EX(getUnicodeVersion_arginfo, 0, ZEND_RETURN_VALUE, 0)
527 ZEND_END_ARG_INFO();
IC_METHOD(getUnicodeVersion)528 IC_METHOD(getUnicodeVersion) {
529 	UVersionInfo version;
530 	int i;
531 
532 
533 	u_getUnicodeVersion(version);
534 	array_init(return_value);
535 	for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
536 		add_next_index_long(return_value, version[i]);
537 	}
538 }
539 /* }}} */
540 
541 /* {{{ proto string IntlChar::getFC_NFKC_Closure(int|string $codepoint) */
542 ZEND_BEGIN_ARG_INFO_EX(getFC_NFKC_Closure_arginfo, 0, ZEND_RETURN_VALUE, 1)
543 	ZEND_ARG_INFO(0, codepoint)
544 ZEND_END_ARG_INFO();
IC_METHOD(getFC_NFKC_Closure)545 IC_METHOD(getFC_NFKC_Closure) {
546 	UChar32 cp;
547 	zval *zcp;
548 	UChar *closure;
549 	zend_string *u8str;
550 	int32_t closure_len;
551 	UErrorCode error = U_ZERO_ERROR;
552 
553 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
554 	    (convert_cp(&cp, zcp) == FAILURE)) {
555 		return;
556 	}
557 
558 	closure_len = u_getFC_NFKC_Closure(cp, NULL, 0, &error);
559 	if (closure_len == 0) {
560 		RETURN_EMPTY_STRING();
561 	}
562 	closure = safe_emalloc(sizeof(UChar), closure_len + 1, 0);
563 	error = U_ZERO_ERROR;
564 	closure_len = u_getFC_NFKC_Closure(cp, closure, closure_len, &error);
565 	if (U_FAILURE(error)) {
566 		efree(closure);
567 		INTL_CHECK_STATUS(error, "Failed getting closure");
568 	}
569 
570 	error = U_ZERO_ERROR;
571 	u8str = intl_convert_utf16_to_utf8(closure, closure_len, &error);
572 	INTL_CHECK_STATUS(error, "Failed converting output to UTF8");
573 	efree(closure);
574 	RETVAL_NEW_STR(u8str);
575 }
576 /* }}} */
577 
578 /* {{{ proto bool IntlChar::<name>(int|string $codepoint) */
579 #define IC_BOOL_METHOD_CHAR(name) \
580 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
581 	ZEND_ARG_INFO(0, codepoint) \
582 ZEND_END_ARG_INFO(); \
583 IC_METHOD(name) { \
584 	UChar32 cp; zval *zcp; \
585 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
586 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
587 	RETURN_BOOL(u_##name(cp)); \
588 }
589 IC_BOOL_METHOD_CHAR(isUAlphabetic)
590 IC_BOOL_METHOD_CHAR(isULowercase)
591 IC_BOOL_METHOD_CHAR(isUUppercase)
592 IC_BOOL_METHOD_CHAR(isUWhiteSpace)
593 IC_BOOL_METHOD_CHAR(islower)
594 IC_BOOL_METHOD_CHAR(isupper)
595 IC_BOOL_METHOD_CHAR(istitle)
596 IC_BOOL_METHOD_CHAR(isdigit)
597 IC_BOOL_METHOD_CHAR(isalpha)
598 IC_BOOL_METHOD_CHAR(isalnum)
599 IC_BOOL_METHOD_CHAR(isxdigit)
600 IC_BOOL_METHOD_CHAR(ispunct)
601 IC_BOOL_METHOD_CHAR(isgraph)
602 IC_BOOL_METHOD_CHAR(isblank)
603 IC_BOOL_METHOD_CHAR(isdefined)
604 IC_BOOL_METHOD_CHAR(isspace)
605 IC_BOOL_METHOD_CHAR(isJavaSpaceChar)
606 IC_BOOL_METHOD_CHAR(isWhitespace)
607 IC_BOOL_METHOD_CHAR(iscntrl)
608 IC_BOOL_METHOD_CHAR(isISOControl)
609 IC_BOOL_METHOD_CHAR(isprint)
610 IC_BOOL_METHOD_CHAR(isbase)
611 IC_BOOL_METHOD_CHAR(isMirrored)
612 IC_BOOL_METHOD_CHAR(isIDStart)
613 IC_BOOL_METHOD_CHAR(isIDPart)
614 IC_BOOL_METHOD_CHAR(isIDIgnorable)
615 IC_BOOL_METHOD_CHAR(isJavaIDStart)
616 IC_BOOL_METHOD_CHAR(isJavaIDPart)
617 #undef IC_BOOL_METHOD_CHAR
618 /* }}} */
619 
620 /* {{{ proto int IntlChar::<name>(int|string $codepoint) */
621 #define IC_INT_METHOD_CHAR(name) \
622 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
623 	ZEND_ARG_INFO(0, codepoint) \
624 ZEND_END_ARG_INFO(); \
625 IC_METHOD(name) { \
626 	UChar32 cp; zval *zcp; \
627 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
628 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
629 	RETURN_LONG(u_##name(cp)); \
630 }
631 IC_INT_METHOD_CHAR(charDirection)
632 IC_INT_METHOD_CHAR(charType)
633 IC_INT_METHOD_CHAR(getCombiningClass)
634 IC_INT_METHOD_CHAR(charDigitValue)
635 #undef IC_INT_METHOD_CHAR
636 /* }}} */
637 
638 /* {{{ proto int|string IntlChar::<name>(int|string $codepoint)
639  * Returns a utf-8 character if codepoint was passed as a utf-8 sequence
640  * Returns an int otherwise
641  */
642 #define IC_CHAR_METHOD_CHAR(name) \
643 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
644 	ZEND_ARG_INFO(0, codepoint) \
645 ZEND_END_ARG_INFO(); \
646 IC_METHOD(name) { \
647 	UChar32 cp, ret; zval *zcp; \
648 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
649 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
650 	ret = u_##name(cp); \
651 	if (Z_TYPE_P(zcp) == IS_STRING) { \
652 		char buffer[5]; \
653 		int buffer_len = 0; \
654 		U8_APPEND_UNSAFE(buffer, buffer_len, ret); \
655 		buffer[buffer_len] = 0; \
656 		RETURN_STRINGL(buffer, buffer_len); \
657 	} else { \
658 		RETURN_LONG(ret); \
659 	} \
660 }
661 IC_CHAR_METHOD_CHAR(charMirror)
662 IC_CHAR_METHOD_CHAR(tolower)
663 IC_CHAR_METHOD_CHAR(toupper)
664 IC_CHAR_METHOD_CHAR(totitle)
665 #if U_ICU_VERSION_MAJOR_NUM >= 52
666 IC_CHAR_METHOD_CHAR(getBidiPairedBracket)
667 #endif /* ICU >= 52 */
668 #undef IC_CHAR_METHOD_CHAR
669 /* }}} */
670 
671 static const zend_function_entry intlchar_methods[] = {
672 #define IC_ME(mname) PHP_ME(IntlChar, mname, mname##_arginfo, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
673 	IC_ME(chr)
674 	IC_ME(ord)
675 	IC_ME(hasBinaryProperty)
676 	IC_ME(isUAlphabetic)
677 	IC_ME(isULowercase)
678 	IC_ME(isUUppercase)
679 	IC_ME(isUWhiteSpace)
680 	IC_ME(getIntPropertyValue)
681 	IC_ME(getIntPropertyMinValue)
682 	IC_ME(getIntPropertyMaxValue)
683 	IC_ME(getNumericValue)
684 	IC_ME(islower)
685 	IC_ME(isupper)
686 	IC_ME(istitle)
687 	IC_ME(isdigit)
688 	IC_ME(isalpha)
689 	IC_ME(isalnum)
690 	IC_ME(isxdigit)
691 	IC_ME(ispunct)
692 	IC_ME(isgraph)
693 	IC_ME(isblank)
694 	IC_ME(isdefined)
695 	IC_ME(isspace)
696 	IC_ME(isJavaSpaceChar)
697 	IC_ME(isWhitespace)
698 	IC_ME(iscntrl)
699 	IC_ME(isISOControl)
700 	IC_ME(isprint)
701 	IC_ME(isbase)
702 	IC_ME(charDirection)
703 	IC_ME(isMirrored)
704 	IC_ME(charMirror)
705 #if U_ICU_VERSION_MAJOR_NUM >= 52
706 	IC_ME(getBidiPairedBracket)
707 #endif /* ICU >= 52 */
708 	IC_ME(charType)
709 	IC_ME(enumCharTypes)
710 	IC_ME(getCombiningClass)
711 	IC_ME(charDigitValue)
712 	IC_ME(getBlockCode)
713 	IC_ME(charName)
714 	IC_ME(charFromName)
715 	IC_ME(enumCharNames)
716 	IC_ME(getPropertyName)
717 	IC_ME(getPropertyEnum)
718 	IC_ME(getPropertyValueName)
719 	IC_ME(getPropertyValueEnum)
720 	IC_ME(isIDStart)
721 	IC_ME(isIDPart)
722 	IC_ME(isIDIgnorable)
723 	IC_ME(isJavaIDStart)
724 	IC_ME(isJavaIDPart)
725 	IC_ME(tolower)
726 	IC_ME(toupper)
727 	IC_ME(totitle)
728 	IC_ME(foldCase)
729 	IC_ME(digit)
730 	IC_ME(forDigit)
731 	IC_ME(charAge)
732 	IC_ME(getUnicodeVersion)
733 	IC_ME(getFC_NFKC_Closure)
734 #undef IC_ME
735 	PHP_FE_END
736 };
737 
php_uchar_minit(INIT_FUNC_ARGS)738 int php_uchar_minit(INIT_FUNC_ARGS) {
739 	zend_class_entry tmp, *ce;
740 
741 	INIT_CLASS_ENTRY(tmp, "IntlChar", intlchar_methods);
742 	ce = zend_register_internal_class(&tmp);
743 
744 #define IC_CONSTL(name, val) \
745 	zend_declare_class_constant_long(ce, name, strlen(name), val);
746 
747 	zend_declare_class_constant_string(ce, "UNICODE_VERSION", sizeof("UNICODE_VERISON")-1, U_UNICODE_VERSION);
748 	IC_CONSTL("CODEPOINT_MIN", UCHAR_MIN_VALUE)
749 	IC_CONSTL("CODEPOINT_MAX", UCHAR_MAX_VALUE)
750 	zend_declare_class_constant_double(ce, "NO_NUMERIC_VALUE", sizeof("NO_NUMERIC_VALUE")-1, U_NO_NUMERIC_VALUE);
751 
752 	/* All enums used by the uchar APIs.  There are a LOT of them,
753 	 * so they're separated out into include files,
754 	 * leaving this source file for actual implementation.
755 	 */
756 #define UPROPERTY(name) IC_CONSTL("PROPERTY_" #name, UCHAR_##name)
757 #include "uproperty-enum.h"
758 #undef UPROPERTY
759 
760 #define UCHARCATEGORY(name) IC_CONSTL("CHAR_CATEGORY_" #name, U_##name)
761 #include "ucharcategory-enum.h"
762 #undef UCHARCATEGORY
763 
764 #define UCHARDIRECTION(name) IC_CONSTL("CHAR_DIRECTION_" #name, U_##name)
765 #include "uchardirection-enum.h"
766 #undef UCHARDIRECTION
767 
768 #define UBLOCKCODE(name) IC_CONSTL("BLOCK_CODE_" #name, UBLOCK_##name)
769 #include "ublockcode-enum.h"
770 #undef UBLOCKCODE
771 
772 	/* Smaller, self-destribing enums */
773 #define UOTHER(name) IC_CONSTL(#name, U_##name)
774 #include "uother-enum.h"
775 #undef UOTHER
776 
777 #undef IC_CONSTL
778 #undef IC_CONSTS
779 
780 	return SUCCESS;
781 }
782