1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <unicode/brkiter.h>
22 #include "codepointiterator_internal.h"
23 
24 #include "breakiterator_iterators.h"
25 
26 extern "C" {
27 #include "../php_intl.h"
28 #define USE_BREAKITERATOR_POINTER 1
29 #include "breakiterator_class.h"
30 #include "../locale/locale.h"
31 #include <zend_exceptions.h>
32 }
33 
34 using PHP::CodePointBreakIterator;
35 
PHP_METHOD(BreakIterator,__construct)36 U_CFUNC PHP_METHOD(BreakIterator, __construct)
37 {
38 	zend_throw_exception( NULL,
39 		"An object of this type cannot be created with the new operator",
40 		0 );
41 }
42 
_breakiter_factory(const char * func_name,BreakIterator * (* func)(const Locale &,UErrorCode &),INTERNAL_FUNCTION_PARAMETERS)43 static void _breakiter_factory(const char *func_name,
44 							   BreakIterator *(*func)(const Locale&, UErrorCode&),
45 							   INTERNAL_FUNCTION_PARAMETERS)
46 {
47 	BreakIterator	*biter;
48 	const char		*locale_str = NULL;
49 	size_t				dummy;
50 	char			*msg;
51 	UErrorCode		status = UErrorCode();
52 	intl_error_reset(NULL);
53 
54 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!",
55 			&locale_str, &dummy) == FAILURE) {
56 		spprintf(&msg, 0, "%s: bad arguments", func_name);
57 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
58 		efree(msg);
59 		RETURN_NULL();
60 	}
61 
62 	if (locale_str == NULL) {
63 		locale_str = intl_locale_get_default();
64 	}
65 
66 	biter = func(Locale::createFromName(locale_str), status);
67 	intl_error_set_code(NULL, status);
68 	if (U_FAILURE(status)) {
69 		spprintf(&msg, 0, "%s: error creating BreakIterator",
70 				func_name);
71 		intl_error_set_custom_msg(NULL, msg, 1);
72 		efree(msg);
73 		RETURN_NULL();
74 	}
75 
76 	breakiterator_object_create(return_value, biter, 1);
77 }
78 
PHP_FUNCTION(breakiter_create_word_instance)79 U_CFUNC PHP_FUNCTION(breakiter_create_word_instance)
80 {
81 	_breakiter_factory("breakiter_create_word_instance",
82 			&BreakIterator::createWordInstance,
83 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
84 }
85 
PHP_FUNCTION(breakiter_create_line_instance)86 U_CFUNC PHP_FUNCTION(breakiter_create_line_instance)
87 {
88 	_breakiter_factory("breakiter_create_line_instance",
89 			&BreakIterator::createLineInstance,
90 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
91 }
92 
PHP_FUNCTION(breakiter_create_character_instance)93 U_CFUNC PHP_FUNCTION(breakiter_create_character_instance)
94 {
95 	_breakiter_factory("breakiter_create_character_instance",
96 			&BreakIterator::createCharacterInstance,
97 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
98 }
99 
PHP_FUNCTION(breakiter_create_sentence_instance)100 U_CFUNC PHP_FUNCTION(breakiter_create_sentence_instance)
101 {
102 	_breakiter_factory("breakiter_create_sentence_instance",
103 			&BreakIterator::createSentenceInstance,
104 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
105 }
106 
PHP_FUNCTION(breakiter_create_title_instance)107 U_CFUNC PHP_FUNCTION(breakiter_create_title_instance)
108 {
109 	_breakiter_factory("breakiter_create_title_instance",
110 			&BreakIterator::createTitleInstance,
111 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
112 }
113 
PHP_FUNCTION(breakiter_create_code_point_instance)114 U_CFUNC PHP_FUNCTION(breakiter_create_code_point_instance)
115 {
116 	UErrorCode status = UErrorCode();
117 	intl_error_reset(NULL);
118 
119 	if (zend_parse_parameters_none() == FAILURE) {
120 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
121 			"breakiter_create_code_point_instance: bad arguments", 0);
122 		RETURN_NULL();
123 	}
124 
125 	CodePointBreakIterator *cpbi = new CodePointBreakIterator();
126 	breakiterator_object_create(return_value, cpbi, 1);
127 }
128 
PHP_FUNCTION(breakiter_get_text)129 U_CFUNC PHP_FUNCTION(breakiter_get_text)
130 {
131 	BREAKITER_METHOD_INIT_VARS;
132 	object = getThis();
133 
134 	if (zend_parse_parameters_none() == FAILURE) {
135 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
136 			"breakiter_get_text: bad arguments", 0);
137 		RETURN_FALSE;
138 	}
139 
140 	BREAKITER_METHOD_FETCH_OBJECT;
141 
142 	if (Z_ISUNDEF(bio->text)) {
143 		RETURN_NULL();
144 	} else {
145 		ZVAL_COPY(return_value, &bio->text);
146 	}
147 }
148 
PHP_FUNCTION(breakiter_set_text)149 U_CFUNC PHP_FUNCTION(breakiter_set_text)
150 {
151 	UText	*ut = NULL;
152 	zend_string	*text;
153 	BREAKITER_METHOD_INIT_VARS;
154 	object = getThis();
155 
156 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &text) == FAILURE) {
157 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
158 			"breakiter_set_text: bad arguments", 0);
159 		RETURN_FALSE;
160 	}
161 
162 	BREAKITER_METHOD_FETCH_OBJECT;
163 
164 	ut = utext_openUTF8(ut, ZSTR_VAL(text), ZSTR_LEN(text), BREAKITER_ERROR_CODE_P(bio));
165 	INTL_METHOD_CHECK_STATUS_OR_NULL(bio, "breakiter_set_text: error opening UText");
166 
167 	bio->biter->setText(ut, BREAKITER_ERROR_CODE(bio));
168 	utext_close(ut); /* ICU shallow clones the UText */
169 	INTL_METHOD_CHECK_STATUS_OR_NULL(bio, "breakiter_set_text: error calling "
170 		"BreakIterator::setText()");
171 
172 	/* When ICU clones the UText, it does not copy the buffer, so we have to
173 	 * keep the string buffer around by holding a reference to its zval. This
174 	 * also allows a faste implementation of getText() */
175 	zval_ptr_dtor(&bio->text);
176 	ZVAL_STR_COPY(&bio->text, text);
177 
178 	RETURN_TRUE;
179 }
180 
_breakiter_no_args_ret_int32(const char * func_name,int32_t (BreakIterator::* func)(),INTERNAL_FUNCTION_PARAMETERS)181 static void _breakiter_no_args_ret_int32(
182 		const char *func_name,
183 		int32_t (BreakIterator::*func)(),
184 		INTERNAL_FUNCTION_PARAMETERS)
185 {
186 	char	*msg;
187 	BREAKITER_METHOD_INIT_VARS;
188 	object = getThis();
189 
190 	if (zend_parse_parameters_none() == FAILURE) {
191 		spprintf(&msg, 0, "%s: bad arguments", func_name);
192 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
193 		efree(msg);
194 		RETURN_FALSE;
195 	}
196 
197 	BREAKITER_METHOD_FETCH_OBJECT;
198 
199 	int32_t res = (bio->biter->*func)();
200 
201 	RETURN_LONG((zend_long)res);
202 }
203 
_breakiter_int32_ret_int32(const char * func_name,int32_t (BreakIterator::* func)(int32_t),INTERNAL_FUNCTION_PARAMETERS)204 static void _breakiter_int32_ret_int32(
205 		const char *func_name,
206 		int32_t (BreakIterator::*func)(int32_t),
207 		INTERNAL_FUNCTION_PARAMETERS)
208 {
209 	char	*msg;
210 	zend_long	arg;
211 	BREAKITER_METHOD_INIT_VARS;
212 	object = getThis();
213 
214 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &arg) == FAILURE) {
215 		spprintf(&msg, 0, "%s: bad arguments", func_name);
216 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
217 		efree(msg);
218 		RETURN_FALSE;
219 	}
220 
221 	BREAKITER_METHOD_FETCH_OBJECT;
222 
223 	if (arg < INT32_MIN || arg > INT32_MAX) {
224 		spprintf(&msg, 0, "%s: offset argument is outside bounds of "
225 				"a 32-bit wide integer", func_name);
226 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
227 		efree(msg);
228 		RETURN_FALSE;
229 	}
230 
231 	int32_t res = (bio->biter->*func)((int32_t)arg);
232 
233 	RETURN_LONG((zend_long)res);
234 }
235 
PHP_FUNCTION(breakiter_first)236 U_CFUNC PHP_FUNCTION(breakiter_first)
237 {
238 	_breakiter_no_args_ret_int32("breakiter_first",
239 			&BreakIterator::first,
240 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
241 }
242 
PHP_FUNCTION(breakiter_last)243 U_CFUNC PHP_FUNCTION(breakiter_last)
244 {
245 	_breakiter_no_args_ret_int32("breakiter_last",
246 			&BreakIterator::last,
247 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
248 }
249 
PHP_FUNCTION(breakiter_previous)250 U_CFUNC PHP_FUNCTION(breakiter_previous)
251 {
252 	_breakiter_no_args_ret_int32("breakiter_previous",
253 			&BreakIterator::previous,
254 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
255 }
256 
PHP_FUNCTION(breakiter_next)257 U_CFUNC PHP_FUNCTION(breakiter_next)
258 {
259 	bool no_arg_version = false;
260 
261 	if (ZEND_NUM_ARGS() == 0) {
262 		no_arg_version = true;
263 	} else if (ZEND_NUM_ARGS() == 1) {
264 		zval *arg;
265 		int res = zend_parse_parameters(ZEND_NUM_ARGS(), "z", &arg);
266 		assert(res == SUCCESS);
267 		if (Z_TYPE_P(arg) == IS_NULL) {
268 			no_arg_version = true;
269 			ZEND_NUM_ARGS() = 0; /* pretend we don't have any argument */
270 		} else {
271 			no_arg_version = false;
272 		}
273 	}
274 
275 	if (no_arg_version) {
276 		_breakiter_no_args_ret_int32("breakiter_next",
277 				&BreakIterator::next,
278 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
279 	} else {
280 		_breakiter_int32_ret_int32("breakiter_next",
281 				&BreakIterator::next,
282 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
283 	}
284 }
285 
PHP_FUNCTION(breakiter_current)286 U_CFUNC PHP_FUNCTION(breakiter_current)
287 {
288 	BREAKITER_METHOD_INIT_VARS;
289 	object = getThis();
290 
291 	if (zend_parse_parameters_none() == FAILURE) {
292 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
293 				"breakiter_current: bad arguments", 0);
294 		RETURN_FALSE;
295 	}
296 
297 	BREAKITER_METHOD_FETCH_OBJECT;
298 
299 	int32_t res = bio->biter->current();
300 
301 	RETURN_LONG((zend_long)res);
302 }
303 
PHP_FUNCTION(breakiter_following)304 U_CFUNC PHP_FUNCTION(breakiter_following)
305 {
306 	_breakiter_int32_ret_int32("breakiter_following",
307 			&BreakIterator::following,
308 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
309 }
310 
PHP_FUNCTION(breakiter_preceding)311 U_CFUNC PHP_FUNCTION(breakiter_preceding)
312 {
313 	_breakiter_int32_ret_int32("breakiter_preceding",
314 			&BreakIterator::preceding,
315 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
316 }
317 
PHP_FUNCTION(breakiter_is_boundary)318 U_CFUNC PHP_FUNCTION(breakiter_is_boundary)
319 {
320 	zend_long offset;
321 	BREAKITER_METHOD_INIT_VARS;
322 	object = getThis();
323 
324 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l",
325 			&offset) == FAILURE) {
326 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
327 				"breakiter_is_boundary: bad arguments", 0);
328 		RETURN_FALSE;
329 	}
330 
331 	if (offset < INT32_MIN || offset > INT32_MAX) {
332 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
333 				"breakiter_is_boundary: offset argument is outside bounds of "
334 				"a 32-bit wide integer", 0);
335 		RETURN_FALSE;
336 	}
337 
338 	BREAKITER_METHOD_FETCH_OBJECT;
339 
340 	UBool res = bio->biter->isBoundary((int32_t)offset);
341 
342 	RETURN_BOOL((zend_long)res);
343 }
344 
PHP_FUNCTION(breakiter_get_locale)345 U_CFUNC PHP_FUNCTION(breakiter_get_locale)
346 {
347 	zend_long	locale_type;
348 	BREAKITER_METHOD_INIT_VARS;
349 	object = getThis();
350 
351 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &locale_type) == FAILURE) {
352 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
353 			"breakiter_get_locale: bad arguments", 0);
354 		RETURN_FALSE;
355 	}
356 
357 	if (locale_type != ULOC_ACTUAL_LOCALE && locale_type != ULOC_VALID_LOCALE) {
358 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
359 			"breakiter_get_locale: invalid locale type", 0);
360 		RETURN_FALSE;
361 	}
362 
363 	BREAKITER_METHOD_FETCH_OBJECT;
364 
365 	Locale locale = bio->biter->getLocale((ULocDataLocaleType)locale_type,
366 		BREAKITER_ERROR_CODE(bio));
367 	INTL_METHOD_CHECK_STATUS(bio,
368 		"breakiter_get_locale: Call to ICU method has failed");
369 
370 	RETURN_STRING(locale.getName());
371 }
372 
PHP_FUNCTION(breakiter_get_parts_iterator)373 U_CFUNC PHP_FUNCTION(breakiter_get_parts_iterator)
374 {
375 	zend_long key_type = 0;
376 	BREAKITER_METHOD_INIT_VARS;
377 	object = getThis();
378 
379 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|l", &key_type) == FAILURE) {
380 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
381 			"breakiter_get_parts_iterator: bad arguments", 0);
382 		RETURN_FALSE;
383 	}
384 
385 	if (key_type != PARTS_ITERATOR_KEY_SEQUENTIAL
386 			&& key_type != PARTS_ITERATOR_KEY_LEFT
387 			&& key_type != PARTS_ITERATOR_KEY_RIGHT) {
388 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
389 			"breakiter_get_parts_iterator: bad key type", 0);
390 		RETURN_FALSE;
391 	}
392 
393 	BREAKITER_METHOD_FETCH_OBJECT;
394 
395 	IntlIterator_from_BreakIterator_parts(
396 		object, return_value, (parts_iter_key_type)key_type);
397 }
398 
PHP_FUNCTION(breakiter_get_error_code)399 U_CFUNC PHP_FUNCTION(breakiter_get_error_code)
400 {
401 	BREAKITER_METHOD_INIT_VARS;
402 	object = getThis();
403 
404 	if (zend_parse_parameters_none() == FAILURE) {
405 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
406 			"breakiter_get_error_code: bad arguments", 0);
407 		RETURN_FALSE;
408 	}
409 
410 	/* Fetch the object (without resetting its last error code ). */
411 	bio = Z_INTL_BREAKITERATOR_P(object);
412 	if (bio == NULL)
413 		RETURN_FALSE;
414 
415 	RETURN_LONG((zend_long)BREAKITER_ERROR_CODE(bio));
416 }
417 
PHP_FUNCTION(breakiter_get_error_message)418 U_CFUNC PHP_FUNCTION(breakiter_get_error_message)
419 {
420 	zend_string* message = NULL;
421 	BREAKITER_METHOD_INIT_VARS;
422 	object = getThis();
423 
424 	if (zend_parse_parameters_none() == FAILURE) {
425 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
426 			"breakiter_get_error_message: bad arguments", 0 );
427 		RETURN_FALSE;
428 	}
429 
430 
431 	/* Fetch the object (without resetting its last error code ). */
432 	bio = Z_INTL_BREAKITERATOR_P(object);
433 	if (bio == NULL)
434 		RETURN_FALSE;
435 
436 	/* Return last error message. */
437 	message = intl_error_get_message(BREAKITER_ERROR_P(bio));
438 	RETURN_STR(message);
439 }
440