1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <unicode/brkiter.h>
22 #include "codepointiterator_internal.h"
23 
24 #include "breakiterator_iterators.h"
25 
26 extern "C" {
27 #include "../php_intl.h"
28 #define USE_BREAKITERATOR_POINTER 1
29 #include "breakiterator_class.h"
30 #include "../locale/locale.h"
31 #include <zend_exceptions.h>
32 }
33 
34 using PHP::CodePointBreakIterator;
35 
PHP_METHOD(BreakIterator,__construct)36 U_CFUNC PHP_METHOD(BreakIterator, __construct)
37 {
38 	zend_throw_exception( NULL,
39 		"An object of this type cannot be created with the new operator",
40 		0 TSRMLS_CC );
41 }
42 
_breakiter_factory(const char * func_name,BreakIterator * (* func)(const Locale &,UErrorCode &),INTERNAL_FUNCTION_PARAMETERS)43 static void _breakiter_factory(const char *func_name,
44 							   BreakIterator *(*func)(const Locale&, UErrorCode&),
45 							   INTERNAL_FUNCTION_PARAMETERS)
46 {
47 	BreakIterator	*biter;
48 	const char		*locale_str = NULL;
49 	int				dummy;
50 	char			*msg;
51 	UErrorCode		status = UErrorCode();
52 	intl_error_reset(NULL TSRMLS_CC);
53 
54 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s!",
55 			&locale_str, &dummy) == FAILURE) {
56 		spprintf(&msg, 0, "%s: bad arguments", func_name);
57 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
58 		efree(msg);
59 		RETURN_NULL();
60 	}
61 
62 	if (locale_str == NULL) {
63 		locale_str = intl_locale_get_default(TSRMLS_C);
64 	}
65 
66 	biter = func(Locale::createFromName(locale_str), status);
67 	intl_error_set_code(NULL, status TSRMLS_CC);
68 	if (U_FAILURE(status)) {
69 		spprintf(&msg, 0, "%s: error creating BreakIterator",
70 				func_name);
71 		intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC);
72 		efree(msg);
73 		RETURN_NULL();
74 	}
75 
76 	breakiterator_object_create(return_value, biter TSRMLS_CC);
77 }
78 
PHP_FUNCTION(breakiter_create_word_instance)79 U_CFUNC PHP_FUNCTION(breakiter_create_word_instance)
80 {
81 	_breakiter_factory("breakiter_create_word_instance",
82 			&BreakIterator::createWordInstance,
83 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
84 }
85 
PHP_FUNCTION(breakiter_create_line_instance)86 U_CFUNC PHP_FUNCTION(breakiter_create_line_instance)
87 {
88 	_breakiter_factory("breakiter_create_line_instance",
89 			&BreakIterator::createLineInstance,
90 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
91 }
92 
PHP_FUNCTION(breakiter_create_character_instance)93 U_CFUNC PHP_FUNCTION(breakiter_create_character_instance)
94 {
95 	_breakiter_factory("breakiter_create_character_instance",
96 			&BreakIterator::createCharacterInstance,
97 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
98 }
99 
PHP_FUNCTION(breakiter_create_sentence_instance)100 U_CFUNC PHP_FUNCTION(breakiter_create_sentence_instance)
101 {
102 	_breakiter_factory("breakiter_create_sentence_instance",
103 			&BreakIterator::createSentenceInstance,
104 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
105 }
106 
PHP_FUNCTION(breakiter_create_title_instance)107 U_CFUNC PHP_FUNCTION(breakiter_create_title_instance)
108 {
109 	_breakiter_factory("breakiter_create_title_instance",
110 			&BreakIterator::createTitleInstance,
111 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
112 }
113 
PHP_FUNCTION(breakiter_create_code_point_instance)114 U_CFUNC PHP_FUNCTION(breakiter_create_code_point_instance)
115 {
116 	UErrorCode status = UErrorCode();
117 	intl_error_reset(NULL TSRMLS_CC);
118 
119 	if (zend_parse_parameters_none() == FAILURE) {
120 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
121 			"breakiter_create_code_point_instance: bad arguments", 0 TSRMLS_CC);
122 		RETURN_NULL();
123 	}
124 
125 	CodePointBreakIterator *cpbi = new CodePointBreakIterator();
126 	breakiterator_object_create(return_value, cpbi TSRMLS_CC);
127 }
128 
PHP_FUNCTION(breakiter_get_text)129 U_CFUNC PHP_FUNCTION(breakiter_get_text)
130 {
131 	BREAKITER_METHOD_INIT_VARS;
132 	object = getThis();
133 
134 	if (zend_parse_parameters_none() == FAILURE) {
135 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
136 			"breakiter_get_text: bad arguments", 0 TSRMLS_CC);
137 		RETURN_FALSE;
138 	}
139 
140 	BREAKITER_METHOD_FETCH_OBJECT;
141 
142 	if (bio->text == NULL) {
143 		RETURN_NULL();
144 	} else {
145 		RETURN_ZVAL(bio->text, 1, 0);
146 	}
147 }
148 
PHP_FUNCTION(breakiter_set_text)149 U_CFUNC PHP_FUNCTION(breakiter_set_text)
150 {
151 	char	*text;
152 	int		text_len;
153 	UText	*ut = NULL;
154 	zval	**textzv;
155 	BREAKITER_METHOD_INIT_VARS;
156 	object = getThis();
157 
158 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s",
159 			&text, &text_len) == FAILURE) {
160 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
161 			"breakiter_set_text: bad arguments", 0 TSRMLS_CC);
162 		RETURN_FALSE;
163 	}
164 
165 	int res = zend_get_parameters_ex(1, &textzv);
166 	assert(res == SUCCESS);
167 
168 	BREAKITER_METHOD_FETCH_OBJECT;
169 
170 	/* assert it's safe to use text and text_len because zpp changes the
171 	 * arguments in the stack */
172 	assert(text == Z_STRVAL_PP(textzv));
173 
174 	ut = utext_openUTF8(ut, text, text_len, BREAKITER_ERROR_CODE_P(bio));
175 	INTL_CTOR_CHECK_STATUS(bio, "breakiter_set_text: error opening UText");
176 
177 	bio->biter->setText(ut, BREAKITER_ERROR_CODE(bio));
178 	utext_close(ut); /* ICU shallow clones the UText */
179 	INTL_CTOR_CHECK_STATUS(bio, "breakiter_set_text: error calling "
180 		"BreakIterator::setText()");
181 
182 	/* When ICU clones the UText, it does not copy the buffer, so we have to
183 	 * keep the string buffer around by holding a reference to its zval. This
184 	 * also allows a faste implementation of getText() */
185 	if (bio->text != NULL) {
186 		zval_ptr_dtor(&bio->text);
187 	}
188 	bio->text = *textzv;
189 	zval_add_ref(&bio->text);
190 
191 	RETURN_TRUE;
192 }
193 
_breakiter_no_args_ret_int32(const char * func_name,int32_t (BreakIterator::* func)(),INTERNAL_FUNCTION_PARAMETERS)194 static void _breakiter_no_args_ret_int32(
195 		const char *func_name,
196 		int32_t (BreakIterator::*func)(),
197 		INTERNAL_FUNCTION_PARAMETERS)
198 {
199 	char	*msg;
200 	BREAKITER_METHOD_INIT_VARS;
201 	object = getThis();
202 
203 	if (zend_parse_parameters_none() == FAILURE) {
204 		spprintf(&msg, 0, "%s: bad arguments", func_name);
205 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
206 		efree(msg);
207 		RETURN_FALSE;
208 	}
209 
210 	BREAKITER_METHOD_FETCH_OBJECT;
211 
212 	int32_t res = (bio->biter->*func)();
213 
214 	RETURN_LONG((long)res);
215 }
216 
_breakiter_int32_ret_int32(const char * func_name,int32_t (BreakIterator::* func)(int32_t),INTERNAL_FUNCTION_PARAMETERS)217 static void _breakiter_int32_ret_int32(
218 		const char *func_name,
219 		int32_t (BreakIterator::*func)(int32_t),
220 		INTERNAL_FUNCTION_PARAMETERS)
221 {
222 	char	*msg;
223 	long	arg;
224 	BREAKITER_METHOD_INIT_VARS;
225 	object = getThis();
226 
227 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &arg) == FAILURE) {
228 		spprintf(&msg, 0, "%s: bad arguments", func_name);
229 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
230 		efree(msg);
231 		RETURN_FALSE;
232 	}
233 
234 	BREAKITER_METHOD_FETCH_OBJECT;
235 
236 	if (arg < INT32_MIN || arg > INT32_MAX) {
237 		spprintf(&msg, 0, "%s: offset argument is outside bounds of "
238 				"a 32-bit wide integer", func_name);
239 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
240 		efree(msg);
241 		RETURN_FALSE;
242 	}
243 
244 	int32_t res = (bio->biter->*func)((int32_t)arg);
245 
246 	RETURN_LONG((long)res);
247 }
248 
PHP_FUNCTION(breakiter_first)249 U_CFUNC PHP_FUNCTION(breakiter_first)
250 {
251 	_breakiter_no_args_ret_int32("breakiter_first",
252 			&BreakIterator::first,
253 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
254 }
255 
PHP_FUNCTION(breakiter_last)256 U_CFUNC PHP_FUNCTION(breakiter_last)
257 {
258 	_breakiter_no_args_ret_int32("breakiter_last",
259 			&BreakIterator::last,
260 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
261 }
262 
PHP_FUNCTION(breakiter_previous)263 U_CFUNC PHP_FUNCTION(breakiter_previous)
264 {
265 	_breakiter_no_args_ret_int32("breakiter_previous",
266 			&BreakIterator::previous,
267 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
268 }
269 
PHP_FUNCTION(breakiter_next)270 U_CFUNC PHP_FUNCTION(breakiter_next)
271 {
272 	bool no_arg_version = false;
273 
274 	if (ZEND_NUM_ARGS() == 0) {
275 		no_arg_version = true;
276 	} else if (ZEND_NUM_ARGS() == 1) {
277 		zval **arg;
278 		int res = zend_get_parameters_ex(1, &arg);
279 		assert(res == SUCCESS);
280 		if (Z_TYPE_PP(arg) == IS_NULL) {
281 			no_arg_version = true;
282 			ht = 0; /* pretend we don't have any argument */
283 		} else {
284 			no_arg_version = false;
285 		}
286 	}
287 
288 	if (no_arg_version) {
289 		_breakiter_no_args_ret_int32("breakiter_next",
290 				&BreakIterator::next,
291 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
292 	} else {
293 		_breakiter_int32_ret_int32("breakiter_next",
294 				&BreakIterator::next,
295 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
296 	}
297 }
298 
PHP_FUNCTION(breakiter_current)299 U_CFUNC PHP_FUNCTION(breakiter_current)
300 {
301 	BREAKITER_METHOD_INIT_VARS;
302 	object = getThis();
303 
304 	if (zend_parse_parameters_none() == FAILURE) {
305 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
306 				"breakiter_current: bad arguments", 0 TSRMLS_CC);
307 		RETURN_FALSE;
308 	}
309 
310 	BREAKITER_METHOD_FETCH_OBJECT;
311 
312 	int32_t res = bio->biter->current();
313 
314 	RETURN_LONG((long)res);
315 }
316 
PHP_FUNCTION(breakiter_following)317 U_CFUNC PHP_FUNCTION(breakiter_following)
318 {
319 	_breakiter_int32_ret_int32("breakiter_following",
320 			&BreakIterator::following,
321 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
322 }
323 
PHP_FUNCTION(breakiter_preceding)324 U_CFUNC PHP_FUNCTION(breakiter_preceding)
325 {
326 	_breakiter_int32_ret_int32("breakiter_preceding",
327 			&BreakIterator::preceding,
328 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
329 }
330 
PHP_FUNCTION(breakiter_is_boundary)331 U_CFUNC PHP_FUNCTION(breakiter_is_boundary)
332 {
333 	long offset;
334 	BREAKITER_METHOD_INIT_VARS;
335 	object = getThis();
336 
337 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l",
338 			&offset) == FAILURE) {
339 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
340 				"breakiter_is_boundary: bad arguments", 0 TSRMLS_CC);
341 		RETURN_FALSE;
342 	}
343 
344 	if (offset < INT32_MIN || offset > INT32_MAX) {
345 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
346 				"breakiter_is_boundary: offset argument is outside bounds of "
347 				"a 32-bit wide integer", 0 TSRMLS_CC);
348 		RETURN_FALSE;
349 	}
350 
351 	BREAKITER_METHOD_FETCH_OBJECT;
352 
353 	UBool res = bio->biter->isBoundary((int32_t)offset);
354 
355 	RETURN_BOOL((long)res);
356 }
357 
PHP_FUNCTION(breakiter_get_locale)358 U_CFUNC PHP_FUNCTION(breakiter_get_locale)
359 {
360 	long	locale_type;
361 	BREAKITER_METHOD_INIT_VARS;
362 	object = getThis();
363 
364 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &locale_type) == FAILURE) {
365 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
366 			"breakiter_get_locale: bad arguments", 0 TSRMLS_CC);
367 		RETURN_FALSE;
368 	}
369 
370 	if (locale_type != ULOC_ACTUAL_LOCALE && locale_type != ULOC_VALID_LOCALE) {
371 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
372 			"breakiter_get_locale: invalid locale type", 0 TSRMLS_CC);
373 		RETURN_FALSE;
374 	}
375 
376 	BREAKITER_METHOD_FETCH_OBJECT;
377 
378 	Locale locale = bio->biter->getLocale((ULocDataLocaleType)locale_type,
379 		BREAKITER_ERROR_CODE(bio));
380 	INTL_METHOD_CHECK_STATUS(bio,
381 		"breakiter_get_locale: Call to ICU method has failed");
382 
383 	RETURN_STRING(locale.getName(), 1);
384 }
385 
PHP_FUNCTION(breakiter_get_parts_iterator)386 U_CFUNC PHP_FUNCTION(breakiter_get_parts_iterator)
387 {
388 	long key_type = 0;
389 	BREAKITER_METHOD_INIT_VARS;
390 	object = getThis();
391 
392 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|l", &key_type) == FAILURE) {
393 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
394 			"breakiter_get_parts_iterator: bad arguments", 0 TSRMLS_CC);
395 		RETURN_FALSE;
396 	}
397 
398 	if (key_type != PARTS_ITERATOR_KEY_SEQUENTIAL
399 			&& key_type != PARTS_ITERATOR_KEY_LEFT
400 			&& key_type != PARTS_ITERATOR_KEY_RIGHT) {
401 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
402 			"breakiter_get_parts_iterator: bad key type", 0 TSRMLS_CC);
403 		RETURN_FALSE;
404 	}
405 
406 	BREAKITER_METHOD_FETCH_OBJECT;
407 
408 	IntlIterator_from_BreakIterator_parts(
409 		object, return_value, (parts_iter_key_type)key_type TSRMLS_CC);
410 }
411 
PHP_FUNCTION(breakiter_get_error_code)412 U_CFUNC PHP_FUNCTION(breakiter_get_error_code)
413 {
414 	BREAKITER_METHOD_INIT_VARS;
415 	object = getThis();
416 
417 	if (zend_parse_parameters_none() == FAILURE) {
418 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
419 			"breakiter_get_error_code: bad arguments", 0 TSRMLS_CC);
420 		RETURN_FALSE;
421 	}
422 
423 	/* Fetch the object (without resetting its last error code ). */
424 	bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
425 	if (bio == NULL)
426 		RETURN_FALSE;
427 
428 	RETURN_LONG((long)BREAKITER_ERROR_CODE(bio));
429 }
430 
PHP_FUNCTION(breakiter_get_error_message)431 U_CFUNC PHP_FUNCTION(breakiter_get_error_message)
432 {
433 	const char* message = NULL;
434 	BREAKITER_METHOD_INIT_VARS;
435 	object = getThis();
436 
437 	if (zend_parse_parameters_none() == FAILURE) {
438 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
439 			"breakiter_get_error_message: bad arguments", 0 TSRMLS_CC );
440 		RETURN_FALSE;
441 	}
442 
443 
444 	/* Fetch the object (without resetting its last error code ). */
445 	bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
446 	if (bio == NULL)
447 		RETURN_FALSE;
448 
449 	/* Return last error message. */
450 	message = intl_error_get_message(BREAKITER_ERROR_P(bio) TSRMLS_CC);
451 	RETURN_STRING(message, 0);
452 }
453