xref: /PHP-7.4/ext/intl/uchar/uchar.c (revision 1ad08256)
1 #include "uchar.h"
2 #include "intl_data.h"
3 #include "intl_convert.h"
4 
5 #include <unicode/uchar.h>
6 #include <unicode/utf8.h>
7 
8 #define IC_METHOD(mname) PHP_METHOD(IntlChar, mname)
9 
convert_cp(UChar32 * pcp,zval * zcp)10 static inline int convert_cp(UChar32* pcp, zval *zcp) {
11 	zend_long cp = -1;
12 
13 	if (Z_TYPE_P(zcp) == IS_LONG) {
14 		cp = Z_LVAL_P(zcp);
15 	} else if (Z_TYPE_P(zcp) == IS_STRING) {
16 		int32_t i = 0;
17 		size_t zcp_len = Z_STRLEN_P(zcp);
18 
19 		if (ZEND_SIZE_T_INT_OVFL(zcp_len)) {
20 			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
21 			intl_error_set_custom_msg(NULL, "Input string is too long.", 0);
22 			return FAILURE;
23 		}
24 
25 		U8_NEXT(Z_STRVAL_P(zcp), i, zcp_len, cp);
26 		if ((size_t)i != zcp_len) {
27 			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
28 			intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
29 			return FAILURE;
30 		}
31 	} else {
32 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
33 		intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point.  Must be either integer or UTF-8 sequence.", 0);
34 		return FAILURE;
35 	}
36 	if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) {
37 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
38 		intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
39 		return FAILURE;
40 	}
41 	*pcp = (UChar32)cp;
42 	return SUCCESS;
43 }
44 
45 /* {{{ proto string IntlChar::chr(int|string $codepoint)
46  * Converts a numeric codepoint to UTF-8
47  * Acts as an identify function when given a valid UTF-8 encoded codepoint
48  */
49 ZEND_BEGIN_ARG_INFO_EX(chr_arginfo, 0, ZEND_RETURN_VALUE, 1)
50 	ZEND_ARG_INFO(0, codepoint)
51 ZEND_END_ARG_INFO();
IC_METHOD(chr)52 IC_METHOD(chr) {
53 	UChar32 cp;
54 	zval *zcp;
55 	char buffer[5];
56 	int buffer_len = 0;
57 
58 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
59 	    (convert_cp(&cp, zcp) == FAILURE)) {
60 		return;
61 	}
62 
63 	/* We can use unsafe because we know the codepoint is in valid range
64 	 * and that 4 bytes is enough for any unicode point
65 	 */
66 	U8_APPEND_UNSAFE(buffer, buffer_len, cp);
67 	buffer[buffer_len] = 0;
68 	RETURN_STRINGL(buffer, buffer_len);
69 }
70 /* }}} */
71 
72 /* {{{ proto int IntlChar::ord(int|string $character)
73  * Converts a UTf-8 encoded codepoint to its integer U32 value
74  * Acts as an identity function when passed a valid integer codepoint
75  */
76 ZEND_BEGIN_ARG_INFO_EX(ord_arginfo, 0, ZEND_RETURN_VALUE, 1)
77 	ZEND_ARG_INFO(0, character)
78 ZEND_END_ARG_INFO();
IC_METHOD(ord)79 IC_METHOD(ord) {
80 	UChar32 cp;
81 	zval *zcp;
82 
83 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
84 	    (convert_cp(&cp, zcp) == FAILURE)) {
85 		return;
86 	}
87 
88 	RETURN_LONG(cp);
89 }
90 /* }}} */
91 
92 /* {{{ proto bool IntlChar::hasBinaryProperty(int|string $codepoint, int $property) */
93 ZEND_BEGIN_ARG_INFO_EX(hasBinaryProperty_arginfo, 0, ZEND_RETURN_VALUE, 2)
94 	ZEND_ARG_INFO(0, codepoint)
95 	ZEND_ARG_INFO(0, property)
96 ZEND_END_ARG_INFO();
IC_METHOD(hasBinaryProperty)97 IC_METHOD(hasBinaryProperty) {
98 	UChar32 cp;
99 	zend_long prop;
100 	zval *zcp;
101 
102 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
103 	    (convert_cp(&cp, zcp) == FAILURE)) {
104 		return;
105 	}
106 
107 	RETURN_BOOL(u_hasBinaryProperty(cp, (UProperty)prop));
108 }
109 /* }}} */
110 
111 /* {{{ proto int IntlChar::getIntPropertyValue(int|string $codepoint, int $property) */
112 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyValue_arginfo, 0, ZEND_RETURN_VALUE, 2)
113 	ZEND_ARG_INFO(0, codepoint)
114 	ZEND_ARG_INFO(0, property)
115 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyValue)116 IC_METHOD(getIntPropertyValue) {
117 	UChar32 cp;
118 	zend_long prop;
119 	zval *zcp;
120 
121 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zl", &zcp, &prop) == FAILURE) ||
122 	    (convert_cp(&cp, zcp) == FAILURE)) {
123 		return;
124 	}
125 
126 	RETURN_LONG(u_getIntPropertyValue(cp, (UProperty)prop));
127 }
128 /* }}} */
129 
130 /* {{{ proto int IntlChar::getIntPropertyMinValue(int $property) */
131 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMinValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
132 	ZEND_ARG_INFO(0, property)
133 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyMinValue)134 IC_METHOD(getIntPropertyMinValue) {
135 	zend_long prop;
136 
137 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
138 		return;
139 	}
140 
141 	RETURN_LONG(u_getIntPropertyMinValue((UProperty)prop));
142 }
143 /* }}} */
144 
145 /* {{{ proto int IntlChar::getIntPropertyMaxValue(int $property) */
146 ZEND_BEGIN_ARG_INFO_EX(getIntPropertyMaxValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
147 	ZEND_ARG_INFO(0, property)
148 ZEND_END_ARG_INFO();
IC_METHOD(getIntPropertyMaxValue)149 IC_METHOD(getIntPropertyMaxValue) {
150 	zend_long prop;
151 
152 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &prop) == FAILURE) {
153 		return;
154 	}
155 
156 	RETURN_LONG(u_getIntPropertyMaxValue((UProperty)prop));
157 }
158 /* }}} */
159 
160 /* {{{ proto float IntlChar::getNumericValue(int|string $codepoint) */
161 ZEND_BEGIN_ARG_INFO_EX(getNumericValue_arginfo, 0, ZEND_RETURN_VALUE, 1)
162 	ZEND_ARG_INFO(0, codepoint)
163 ZEND_END_ARG_INFO();
IC_METHOD(getNumericValue)164 IC_METHOD(getNumericValue) {
165 	UChar32 cp;
166 	zval *zcp;
167 
168 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
169 	    (convert_cp(&cp, zcp) == FAILURE)) {
170 		return;
171 	}
172 
173 	RETURN_DOUBLE(u_getNumericValue(cp));
174 }
175 /* }}} */
176 
177 /* {{{ proto void IntlChar::enumCharTypes(callable $callback) */
178 ZEND_BEGIN_ARG_INFO_EX(enumCharTypes_arginfo, 0, ZEND_RETURN_VALUE, 0)
179 	ZEND_ARG_INFO(0, callback)
180 ZEND_END_ARG_INFO();
181 typedef struct _enumCharType_data {
182 	zend_fcall_info fci;
183 	zend_fcall_info_cache fci_cache;
184 } enumCharType_data;
enumCharType_callback(enumCharType_data * context,UChar32 start,UChar32 limit,UCharCategory type)185 static UBool enumCharType_callback(enumCharType_data *context,
186                                    UChar32 start, UChar32 limit,
187                                    UCharCategory type) {
188 	zval retval;
189 	zval args[3];
190 
191 	ZVAL_NULL(&retval);
192 	/* Note that $start is INclusive, while $limit is EXclusive
193 	 * Therefore (0, 32, 15) means CPs 0..31 are of type 15
194 	 */
195 	ZVAL_LONG(&args[0], start);
196 	ZVAL_LONG(&args[1], limit);
197 	ZVAL_LONG(&args[2], type);
198 
199 	context->fci.retval = &retval;
200 	context->fci.param_count = 3;
201 	context->fci.params = args;
202 
203 	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
204 		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
205 		intl_errors_set_custom_msg(NULL, "enumCharTypes callback failed", 0);
206 		zval_ptr_dtor(&retval);
207 		return 0;
208 	}
209 	zval_ptr_dtor(&retval);
210 	return 1;
211 }
IC_METHOD(enumCharTypes)212 IC_METHOD(enumCharTypes) {
213 	enumCharType_data context;
214 
215 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "f", &context.fci, &context.fci_cache) == FAILURE) {
216 		return;
217 	}
218 	u_enumCharTypes((UCharEnumTypeRange*)enumCharType_callback, &context);
219 }
220 /* }}} */
221 
222 /* {{{ proto int IntlChar::getBlockCode(int|string $codepoint) */
223 ZEND_BEGIN_ARG_INFO_EX(getBlockCode_arginfo, 0, ZEND_RETURN_VALUE, 1)
224 	ZEND_ARG_INFO(0, codepoint)
ZEND_END_ARG_INFO()225 ZEND_END_ARG_INFO()
226 IC_METHOD(getBlockCode) {
227 	UChar32 cp;
228 	zval *zcp;
229 
230 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
231 	    (convert_cp(&cp, zcp) == FAILURE)) {
232 		return;
233 	}
234 
235 	RETURN_LONG(ublock_getCode(cp));
236 }
237 /* }}} */
238 
239 /* {{{ proto string IntlChar::charName(int|string $codepoint, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
240 ZEND_BEGIN_ARG_INFO_EX(charName_arginfo, 0, ZEND_RETURN_VALUE, 1)
241 	ZEND_ARG_INFO(0, codepoint)
242 	ZEND_ARG_INFO(0, nameChoice)
ZEND_END_ARG_INFO()243 ZEND_END_ARG_INFO()
244 IC_METHOD(charName) {
245 	UChar32 cp;
246 	zval *zcp;
247 	UErrorCode error = U_ZERO_ERROR;
248 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
249 	zend_string *buffer = NULL;
250 	int32_t buffer_len;
251 
252 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &nameChoice) == FAILURE) ||
253 	    (convert_cp(&cp, zcp) == FAILURE)) {
254 		RETURN_NULL();
255 	}
256 
257 	buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, NULL, 0, &error);
258 	buffer = zend_string_alloc(buffer_len, 0);
259 	error = U_ZERO_ERROR;
260 	buffer_len = u_charName(cp, (UCharNameChoice)nameChoice, ZSTR_VAL(buffer), ZSTR_LEN(buffer) + 1, &error);
261 	if (U_FAILURE(error)) {
262 		zend_string_efree(buffer);
263 		INTL_CHECK_STATUS_OR_NULL(error, "Failure getting character name");
264 	}
265 	RETURN_NEW_STR(buffer);
266 }
267 /* }}} */
268 
269 /* {{{ proto int IntlChar::charFromName(string $characterName, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
270 ZEND_BEGIN_ARG_INFO_EX(charFromName_arginfo, 0, ZEND_RETURN_VALUE, 1)
271 	ZEND_ARG_INFO(0, characterName)
272 	ZEND_ARG_INFO(0, nameChoice)
ZEND_END_ARG_INFO()273 ZEND_END_ARG_INFO()
274 IC_METHOD(charFromName) {
275 	char *name;
276 	size_t name_len;
277 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
278 	UChar32 ret;
279 	UErrorCode error = U_ZERO_ERROR;
280 
281 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|l", &name, &name_len, &nameChoice) == FAILURE) {
282 		RETURN_NULL();
283 	}
284 
285 	ret = u_charFromName((UCharNameChoice)nameChoice, name, &error);
286 	INTL_CHECK_STATUS_OR_NULL(error, NULL);
287 	RETURN_LONG(ret);
288 }
289 /* }}} */
290 
291 /* {{{ void void IntlChar::enumCharNames(int|string $start, int|string $limit, callable $callback, int $nameChoice = IntlChar::UNICODE_CHAR_NAME) */
292 ZEND_BEGIN_ARG_INFO_EX(enumCharNames_arginfo, 0, ZEND_RETURN_VALUE, 3)
293 	ZEND_ARG_INFO(0, start)
294 	ZEND_ARG_INFO(0, limit)
295 	ZEND_ARG_INFO(0, callback)
296 	ZEND_ARG_INFO(0, nameChoice)
297 ZEND_END_ARG_INFO();
298 typedef struct _enumCharNames_data {
299 	zend_fcall_info fci;
300 	zend_fcall_info_cache fci_cache;
301 } enumCharNames_data;
enumCharNames_callback(enumCharNames_data * context,UChar32 code,UCharNameChoice nameChoice,const char * name,int32_t length)302 static UBool enumCharNames_callback(enumCharNames_data *context,
303                                     UChar32 code, UCharNameChoice nameChoice,
304                                     const char *name, int32_t length) {
305 	zval retval;
306 	zval args[3];
307 
308 	ZVAL_NULL(&retval);
309 	ZVAL_LONG(&args[0], code);
310 	ZVAL_LONG(&args[1], nameChoice);
311 	ZVAL_STRINGL(&args[2], name, length);
312 
313 	context->fci.retval = &retval;
314 	context->fci.param_count = 3;
315 	context->fci.params = args;
316 
317 	if (zend_call_function(&context->fci, &context->fci_cache) == FAILURE) {
318 		intl_error_set_code(NULL, U_INTERNAL_PROGRAM_ERROR);
319 		intl_error_set_custom_msg(NULL, "enumCharNames callback failed", 0);
320 		zval_ptr_dtor(&retval);
321 		zval_ptr_dtor_str(&args[2]);
322 		return 0;
323 	}
324 	zval_ptr_dtor(&retval);
325 	zval_ptr_dtor_str(&args[2]);
326 	return 1;
327 }
IC_METHOD(enumCharNames)328 IC_METHOD(enumCharNames) {
329 	UChar32 start, limit;
330 	zval *zstart, *zlimit;
331 	enumCharNames_data context;
332 	zend_long nameChoice = U_UNICODE_CHAR_NAME;
333 	UErrorCode error = U_ZERO_ERROR;
334 
335 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "zzf|l", &zstart, &zlimit, &context.fci, &context.fci_cache, &nameChoice) == FAILURE) ||
336 	    (convert_cp(&start, zstart) == FAILURE) ||
337 	    (convert_cp(&limit, zlimit) == FAILURE)) {
338 		return;
339 	}
340 
341 	u_enumCharNames(start, limit, (UEnumCharNamesFn*)enumCharNames_callback, &context, nameChoice, &error);
342 	INTL_CHECK_STATUS(error, NULL);
343 }
344 /* }}} */
345 
346 /* {{{ proto string IntlChar::getPropertyName(int $property, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
347 ZEND_BEGIN_ARG_INFO_EX(getPropertyName_arginfo, 0, ZEND_RETURN_VALUE, 1)
348 	ZEND_ARG_INFO(0, property)
349 	ZEND_ARG_INFO(0, nameChoice)
350 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyName)351 IC_METHOD(getPropertyName) {
352 	zend_long property;
353 	zend_long nameChoice = U_LONG_PROPERTY_NAME;
354 	const char *ret;
355 
356 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &property, &nameChoice) == FAILURE) {
357 		return;
358 	}
359 
360 	ret = u_getPropertyName((UProperty)property, (UPropertyNameChoice)nameChoice);
361 	if (ret) {
362 		RETURN_STRING(ret);
363 	} else {
364 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
365 		intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
366 		RETURN_FALSE;
367 	}
368 }
369 /* }}} */
370 
371 /* {{{ proto int IntlChar::getPropertyEnum(string $alias) */
372 ZEND_BEGIN_ARG_INFO_EX(getPropertyEnum_arginfo, 0, ZEND_RETURN_VALUE, 1)
373 	ZEND_ARG_INFO(0, alias)
374 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyEnum)375 IC_METHOD(getPropertyEnum) {
376 	char *alias;
377 	size_t alias_len;
378 
379 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &alias, &alias_len) == FAILURE) {
380 		return;
381 	}
382 
383 	RETURN_LONG(u_getPropertyEnum(alias));
384 }
385 /* }}} */
386 
387 /* {{{ proto string IntlChar::getPropertyValueName(int $property, int $value[, int $nameChoice = IntlChar::LONG_PROPERTY_NAME) */
388 ZEND_BEGIN_ARG_INFO_EX(getPropertyValueName_arginfo, 0, ZEND_RETURN_VALUE, 2)
389 	ZEND_ARG_INFO(0, property)
390 	ZEND_ARG_INFO(0, value)
391 	ZEND_ARG_INFO(0, nameChoice)
392 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyValueName)393 IC_METHOD(getPropertyValueName) {
394 	zend_long property, value, nameChoice = U_LONG_PROPERTY_NAME;
395 	const char *ret;
396 
397 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ll|l", &property, &value, &nameChoice) == FAILURE) {
398 		return;
399 	}
400 
401 	ret = u_getPropertyValueName((UProperty)property, value, (UPropertyNameChoice)nameChoice);
402 	if (ret) {
403 		RETURN_STRING(ret);
404 	} else {
405 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
406 		intl_error_set_custom_msg(NULL, "Failed to get property name", 0);
407 		RETURN_FALSE;
408 	}
409 }
410 /* }}} */
411 
412 /* {{{ proto int IntlChar::getPropertyValueEnum(int $property, string $name) */
413 ZEND_BEGIN_ARG_INFO_EX(getPropertyValueEnum_arginfo, 0, ZEND_RETURN_VALUE, 2)
414 	ZEND_ARG_INFO(0, property)
415 	ZEND_ARG_INFO(0, name)
416 ZEND_END_ARG_INFO();
IC_METHOD(getPropertyValueEnum)417 IC_METHOD(getPropertyValueEnum) {
418 	zend_long property;
419 	char *name;
420 	size_t name_len;
421 
422 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ls", &property, &name, &name_len) == FAILURE) {
423 		return;
424 	}
425 
426 	RETURN_LONG(u_getPropertyValueEnum((UProperty)property, name));
427 }
428 /* }}} */
429 
430 /* {{{ proto int|string IntlChar::foldCase(int|string $codepoint, int $options = IntlChar::FOLD_CASE_DEFAULT) */
431 ZEND_BEGIN_ARG_INFO_EX(foldCase_arginfo, 0, ZEND_RETURN_VALUE, 1)
432 	ZEND_ARG_INFO(0, codepoint)
433 	ZEND_ARG_INFO(0, options)
434 ZEND_END_ARG_INFO();
IC_METHOD(foldCase)435 IC_METHOD(foldCase) {
436 	UChar32 cp, ret;
437 	zval *zcp;
438 	zend_long options = U_FOLD_CASE_DEFAULT;
439 
440 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &options) == FAILURE) ||
441 	    (convert_cp(&cp, zcp) == FAILURE)) {
442 		return;
443 	}
444 
445 	ret = u_foldCase(cp, options);
446 	if (Z_TYPE_P(zcp) == IS_STRING) {
447 		char buffer[5];
448 		int buffer_len = 0;
449 		U8_APPEND_UNSAFE(buffer, buffer_len, ret);
450 		buffer[buffer_len] = 0;
451 		RETURN_STRINGL(buffer, buffer_len);
452 	} else {
453 		RETURN_LONG(ret);
454 	}
455 }
456 /* }}} */
457 
458 /* {{{ proto int IntlChar::digit(int|string $codepoint[, int $radix = 10]) */
459 ZEND_BEGIN_ARG_INFO_EX(digit_arginfo, 0, ZEND_RETURN_VALUE, 1)
460 	ZEND_ARG_INFO(0, codepoint)
461 	ZEND_ARG_INFO(0, radix)
462 ZEND_END_ARG_INFO();
IC_METHOD(digit)463 IC_METHOD(digit) {
464 	UChar32 cp;
465 	zval *zcp;
466 	zend_long radix = 10;
467 	int ret;
468 
469 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z|l", &zcp, &radix) == FAILURE) ||
470 	    (convert_cp(&cp, zcp) == FAILURE)) {
471 		return;
472 	}
473 
474 	ret = u_digit(cp, radix);
475 	if (ret < 0) {
476 		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
477 		intl_error_set_custom_msg(NULL, "Invalid digit", 0);
478 		RETURN_FALSE;
479 	}
480 	RETURN_LONG(ret);
481 }
482 /* }}} */
483 
484 /* {{{ proto int IntlChar::forDigit(int $digit[, int $radix = 10]) */
485 ZEND_BEGIN_ARG_INFO_EX(forDigit_arginfo, 0, ZEND_RETURN_VALUE, 1)
486 	ZEND_ARG_INFO(0, digit)
487 	ZEND_ARG_INFO(0, radix)
488 ZEND_END_ARG_INFO();
IC_METHOD(forDigit)489 IC_METHOD(forDigit) {
490 	zend_long digit, radix = 10;
491 
492 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l|l", &digit, &radix) == FAILURE) {
493 		return;
494 	}
495 
496 	RETURN_LONG(u_forDigit(digit, radix));
497 }
498 /* }}} */
499 
500 /* {{{ proto array IntlChar::charAge(int|string $codepoint) */
501 ZEND_BEGIN_ARG_INFO_EX(charAge_arginfo, 0, ZEND_RETURN_VALUE, 1)
502 	ZEND_ARG_INFO(0, codepoint)
503 ZEND_END_ARG_INFO();
IC_METHOD(charAge)504 IC_METHOD(charAge) {
505 	UChar32 cp;
506 	zval *zcp;
507 	UVersionInfo version;
508 	int i;
509 
510 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
511 	    (convert_cp(&cp, zcp) == FAILURE)) {
512 		return;
513 	}
514 
515 	u_charAge(cp, version);
516 	array_init(return_value);
517 	for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
518 		add_next_index_long(return_value, version[i]);
519 	}
520 }
521 /* }}} */
522 
523 /* {{{ proto array IntlChar::getUnicodeVersion() */
524 ZEND_BEGIN_ARG_INFO_EX(getUnicodeVersion_arginfo, 0, ZEND_RETURN_VALUE, 0)
525 ZEND_END_ARG_INFO();
IC_METHOD(getUnicodeVersion)526 IC_METHOD(getUnicodeVersion) {
527 	UVersionInfo version;
528 	int i;
529 
530 
531 	u_getUnicodeVersion(version);
532 	array_init(return_value);
533 	for(i = 0; i < U_MAX_VERSION_LENGTH; ++i) {
534 		add_next_index_long(return_value, version[i]);
535 	}
536 }
537 /* }}} */
538 
539 /* {{{ proto string IntlChar::getFC_NFKC_Closure(int|string $codepoint) */
540 ZEND_BEGIN_ARG_INFO_EX(getFC_NFKC_Closure_arginfo, 0, ZEND_RETURN_VALUE, 1)
541 	ZEND_ARG_INFO(0, codepoint)
542 ZEND_END_ARG_INFO();
IC_METHOD(getFC_NFKC_Closure)543 IC_METHOD(getFC_NFKC_Closure) {
544 	UChar32 cp;
545 	zval *zcp;
546 	UChar *closure;
547 	zend_string *u8str;
548 	int32_t closure_len;
549 	UErrorCode error = U_ZERO_ERROR;
550 
551 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) ||
552 	    (convert_cp(&cp, zcp) == FAILURE)) {
553 		return;
554 	}
555 
556 	closure_len = u_getFC_NFKC_Closure(cp, NULL, 0, &error);
557 	if (closure_len == 0) {
558 		RETURN_EMPTY_STRING();
559 	}
560 	closure = safe_emalloc(sizeof(UChar), closure_len + 1, 0);
561 	error = U_ZERO_ERROR;
562 	closure_len = u_getFC_NFKC_Closure(cp, closure, closure_len, &error);
563 	if (U_FAILURE(error)) {
564 		efree(closure);
565 		INTL_CHECK_STATUS(error, "Failed getting closure");
566 	}
567 
568 	error = U_ZERO_ERROR;
569 	u8str = intl_convert_utf16_to_utf8(closure, closure_len, &error);
570 	INTL_CHECK_STATUS(error, "Failed converting output to UTF8");
571 	efree(closure);
572 	RETVAL_NEW_STR(u8str);
573 }
574 /* }}} */
575 
576 /* {{{ proto bool IntlChar::<name>(int|string $codepoint) */
577 #define IC_BOOL_METHOD_CHAR(name) \
578 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
579 	ZEND_ARG_INFO(0, codepoint) \
580 ZEND_END_ARG_INFO(); \
581 IC_METHOD(name) { \
582 	UChar32 cp; zval *zcp; \
583 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
584 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
585 	RETURN_BOOL(u_##name(cp)); \
586 }
587 IC_BOOL_METHOD_CHAR(isUAlphabetic)
588 IC_BOOL_METHOD_CHAR(isULowercase)
589 IC_BOOL_METHOD_CHAR(isUUppercase)
590 IC_BOOL_METHOD_CHAR(isUWhiteSpace)
591 IC_BOOL_METHOD_CHAR(islower)
592 IC_BOOL_METHOD_CHAR(isupper)
593 IC_BOOL_METHOD_CHAR(istitle)
594 IC_BOOL_METHOD_CHAR(isdigit)
595 IC_BOOL_METHOD_CHAR(isalpha)
596 IC_BOOL_METHOD_CHAR(isalnum)
597 IC_BOOL_METHOD_CHAR(isxdigit)
598 IC_BOOL_METHOD_CHAR(ispunct)
599 IC_BOOL_METHOD_CHAR(isgraph)
600 IC_BOOL_METHOD_CHAR(isblank)
601 IC_BOOL_METHOD_CHAR(isdefined)
602 IC_BOOL_METHOD_CHAR(isspace)
603 IC_BOOL_METHOD_CHAR(isJavaSpaceChar)
604 IC_BOOL_METHOD_CHAR(isWhitespace)
605 IC_BOOL_METHOD_CHAR(iscntrl)
606 IC_BOOL_METHOD_CHAR(isISOControl)
607 IC_BOOL_METHOD_CHAR(isprint)
608 IC_BOOL_METHOD_CHAR(isbase)
609 IC_BOOL_METHOD_CHAR(isMirrored)
610 IC_BOOL_METHOD_CHAR(isIDStart)
611 IC_BOOL_METHOD_CHAR(isIDPart)
612 IC_BOOL_METHOD_CHAR(isIDIgnorable)
613 IC_BOOL_METHOD_CHAR(isJavaIDStart)
614 IC_BOOL_METHOD_CHAR(isJavaIDPart)
615 #undef IC_BOOL_METHOD_CHAR
616 /* }}} */
617 
618 /* {{{ proto int IntlChar::<name>(int|string $codepoint) */
619 #define IC_INT_METHOD_CHAR(name) \
620 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
621 	ZEND_ARG_INFO(0, codepoint) \
622 ZEND_END_ARG_INFO(); \
623 IC_METHOD(name) { \
624 	UChar32 cp; zval *zcp; \
625 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
626 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
627 	RETURN_LONG(u_##name(cp)); \
628 }
629 IC_INT_METHOD_CHAR(charDirection)
630 IC_INT_METHOD_CHAR(charType)
631 IC_INT_METHOD_CHAR(getCombiningClass)
632 IC_INT_METHOD_CHAR(charDigitValue)
633 #undef IC_INT_METHOD_CHAR
634 /* }}} */
635 
636 /* {{{ proto int|string IntlChar::<name>(int|string $codepoint)
637  * Returns a utf-8 character if codepoint was passed as a utf-8 sequence
638  * Returns an int otherwise
639  */
640 #define IC_CHAR_METHOD_CHAR(name) \
641 ZEND_BEGIN_ARG_INFO_EX(name##_arginfo, 0, ZEND_RETURN_VALUE, 1) \
642 	ZEND_ARG_INFO(0, codepoint) \
643 ZEND_END_ARG_INFO(); \
644 IC_METHOD(name) { \
645 	UChar32 cp, ret; zval *zcp; \
646 	if ((zend_parse_parameters(ZEND_NUM_ARGS(), "z", &zcp) == FAILURE) || \
647 	    (convert_cp(&cp, zcp) == FAILURE)) { return; } \
648 	ret = u_##name(cp); \
649 	if (Z_TYPE_P(zcp) == IS_STRING) { \
650 		char buffer[5]; \
651 		int buffer_len = 0; \
652 		U8_APPEND_UNSAFE(buffer, buffer_len, ret); \
653 		buffer[buffer_len] = 0; \
654 		RETURN_STRINGL(buffer, buffer_len); \
655 	} else { \
656 		RETURN_LONG(ret); \
657 	} \
658 }
659 IC_CHAR_METHOD_CHAR(charMirror)
660 IC_CHAR_METHOD_CHAR(tolower)
661 IC_CHAR_METHOD_CHAR(toupper)
662 IC_CHAR_METHOD_CHAR(totitle)
663 #if U_ICU_VERSION_MAJOR_NUM >= 52
664 IC_CHAR_METHOD_CHAR(getBidiPairedBracket)
665 #endif /* ICU >= 52 */
666 #undef IC_CHAR_METHOD_CHAR
667 /* }}} */
668 
669 static const zend_function_entry intlchar_methods[] = {
670 #define IC_ME(mname) PHP_ME(IntlChar, mname, mname##_arginfo, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC)
671 	IC_ME(chr)
672 	IC_ME(ord)
673 	IC_ME(hasBinaryProperty)
674 	IC_ME(isUAlphabetic)
675 	IC_ME(isULowercase)
676 	IC_ME(isUUppercase)
677 	IC_ME(isUWhiteSpace)
678 	IC_ME(getIntPropertyValue)
679 	IC_ME(getIntPropertyMinValue)
680 	IC_ME(getIntPropertyMaxValue)
681 	IC_ME(getNumericValue)
682 	IC_ME(islower)
683 	IC_ME(isupper)
684 	IC_ME(istitle)
685 	IC_ME(isdigit)
686 	IC_ME(isalpha)
687 	IC_ME(isalnum)
688 	IC_ME(isxdigit)
689 	IC_ME(ispunct)
690 	IC_ME(isgraph)
691 	IC_ME(isblank)
692 	IC_ME(isdefined)
693 	IC_ME(isspace)
694 	IC_ME(isJavaSpaceChar)
695 	IC_ME(isWhitespace)
696 	IC_ME(iscntrl)
697 	IC_ME(isISOControl)
698 	IC_ME(isprint)
699 	IC_ME(isbase)
700 	IC_ME(charDirection)
701 	IC_ME(isMirrored)
702 	IC_ME(charMirror)
703 #if U_ICU_VERSION_MAJOR_NUM >= 52
704 	IC_ME(getBidiPairedBracket)
705 #endif /* ICU >= 52 */
706 	IC_ME(charType)
707 	IC_ME(enumCharTypes)
708 	IC_ME(getCombiningClass)
709 	IC_ME(charDigitValue)
710 	IC_ME(getBlockCode)
711 	IC_ME(charName)
712 	IC_ME(charFromName)
713 	IC_ME(enumCharNames)
714 	IC_ME(getPropertyName)
715 	IC_ME(getPropertyEnum)
716 	IC_ME(getPropertyValueName)
717 	IC_ME(getPropertyValueEnum)
718 	IC_ME(isIDStart)
719 	IC_ME(isIDPart)
720 	IC_ME(isIDIgnorable)
721 	IC_ME(isJavaIDStart)
722 	IC_ME(isJavaIDPart)
723 	IC_ME(tolower)
724 	IC_ME(toupper)
725 	IC_ME(totitle)
726 	IC_ME(foldCase)
727 	IC_ME(digit)
728 	IC_ME(forDigit)
729 	IC_ME(charAge)
730 	IC_ME(getUnicodeVersion)
731 	IC_ME(getFC_NFKC_Closure)
732 #undef IC_ME
733 	PHP_FE_END
734 };
735 
php_uchar_minit(INIT_FUNC_ARGS)736 int php_uchar_minit(INIT_FUNC_ARGS) {
737 	zend_class_entry tmp, *ce;
738 
739 	INIT_CLASS_ENTRY(tmp, "IntlChar", intlchar_methods);
740 	ce = zend_register_internal_class(&tmp);
741 
742 #define IC_CONSTL(name, val) \
743 	zend_declare_class_constant_long(ce, name, strlen(name), val);
744 
745 	zend_declare_class_constant_string(ce, "UNICODE_VERSION", sizeof("UNICODE_VERISON")-1, U_UNICODE_VERSION);
746 	IC_CONSTL("CODEPOINT_MIN", UCHAR_MIN_VALUE)
747 	IC_CONSTL("CODEPOINT_MAX", UCHAR_MAX_VALUE)
748 	zend_declare_class_constant_double(ce, "NO_NUMERIC_VALUE", sizeof("NO_NUMERIC_VALUE")-1, U_NO_NUMERIC_VALUE);
749 
750 	/* All enums used by the uchar APIs.  There are a LOT of them,
751 	 * so they're separated out into include files,
752 	 * leaving this source file for actual implementation.
753 	 */
754 #define UPROPERTY(name) IC_CONSTL("PROPERTY_" #name, UCHAR_##name)
755 #include "uproperty-enum.h"
756 #undef UPROPERTY
757 
758 #define UCHARCATEGORY(name) IC_CONSTL("CHAR_CATEGORY_" #name, U_##name)
759 #include "ucharcategory-enum.h"
760 #undef UCHARCATEGORY
761 
762 #define UCHARDIRECTION(name) IC_CONSTL("CHAR_DIRECTION_" #name, U_##name)
763 #include "uchardirection-enum.h"
764 #undef UCHARDIRECTION
765 
766 #define UBLOCKCODE(name) IC_CONSTL("BLOCK_CODE_" #name, UBLOCK_##name)
767 #include "ublockcode-enum.h"
768 #undef UBLOCKCODE
769 
770 	/* Smaller, self-destribing enums */
771 #define UOTHER(name) IC_CONSTL(#name, U_##name)
772 #include "uother-enum.h"
773 #undef UOTHER
774 
775 #undef IC_CONSTL
776 #undef IC_CONSTS
777 
778 	return SUCCESS;
779 }
780