1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <unicode/brkiter.h>
22 #include "codepointiterator_internal.h"
23 
24 #include "breakiterator_iterators.h"
25 
26 extern "C" {
27 #include "../php_intl.h"
28 #define USE_BREAKITERATOR_POINTER 1
29 #include "breakiterator_class.h"
30 #include "../locale/locale.h"
31 #include <zend_exceptions.h>
32 }
33 
34 using PHP::CodePointBreakIterator;
35 using icu::BreakIterator;
36 using icu::Locale;
37 
PHP_METHOD(BreakIterator,__construct)38 U_CFUNC PHP_METHOD(BreakIterator, __construct)
39 {
40 	zend_throw_exception( NULL,
41 		"An object of this type cannot be created with the new operator",
42 		0 );
43 }
44 
_breakiter_factory(const char * func_name,BreakIterator * (* func)(const Locale &,UErrorCode &),INTERNAL_FUNCTION_PARAMETERS)45 static void _breakiter_factory(const char *func_name,
46 							   BreakIterator *(*func)(const Locale&, UErrorCode&),
47 							   INTERNAL_FUNCTION_PARAMETERS)
48 {
49 	BreakIterator	*biter;
50 	const char		*locale_str = NULL;
51 	size_t				dummy;
52 	char			*msg;
53 	UErrorCode		status = UErrorCode();
54 	intl_error_reset(NULL);
55 
56 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!",
57 			&locale_str, &dummy) == FAILURE) {
58 		spprintf(&msg, 0, "%s: bad arguments", func_name);
59 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
60 		efree(msg);
61 		RETURN_NULL();
62 	}
63 
64 	if (locale_str == NULL) {
65 		locale_str = intl_locale_get_default();
66 	}
67 
68 	biter = func(Locale::createFromName(locale_str), status);
69 	intl_error_set_code(NULL, status);
70 	if (U_FAILURE(status)) {
71 		spprintf(&msg, 0, "%s: error creating BreakIterator",
72 				func_name);
73 		intl_error_set_custom_msg(NULL, msg, 1);
74 		efree(msg);
75 		RETURN_NULL();
76 	}
77 
78 	breakiterator_object_create(return_value, biter, 1);
79 }
80 
PHP_FUNCTION(breakiter_create_word_instance)81 U_CFUNC PHP_FUNCTION(breakiter_create_word_instance)
82 {
83 	_breakiter_factory("breakiter_create_word_instance",
84 			&BreakIterator::createWordInstance,
85 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
86 }
87 
PHP_FUNCTION(breakiter_create_line_instance)88 U_CFUNC PHP_FUNCTION(breakiter_create_line_instance)
89 {
90 	_breakiter_factory("breakiter_create_line_instance",
91 			&BreakIterator::createLineInstance,
92 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
93 }
94 
PHP_FUNCTION(breakiter_create_character_instance)95 U_CFUNC PHP_FUNCTION(breakiter_create_character_instance)
96 {
97 	_breakiter_factory("breakiter_create_character_instance",
98 			&BreakIterator::createCharacterInstance,
99 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
100 }
101 
PHP_FUNCTION(breakiter_create_sentence_instance)102 U_CFUNC PHP_FUNCTION(breakiter_create_sentence_instance)
103 {
104 	_breakiter_factory("breakiter_create_sentence_instance",
105 			&BreakIterator::createSentenceInstance,
106 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
107 }
108 
PHP_FUNCTION(breakiter_create_title_instance)109 U_CFUNC PHP_FUNCTION(breakiter_create_title_instance)
110 {
111 	_breakiter_factory("breakiter_create_title_instance",
112 			&BreakIterator::createTitleInstance,
113 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
114 }
115 
PHP_FUNCTION(breakiter_create_code_point_instance)116 U_CFUNC PHP_FUNCTION(breakiter_create_code_point_instance)
117 {
118 	intl_error_reset(NULL);
119 
120 	if (zend_parse_parameters_none() == FAILURE) {
121 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
122 			"breakiter_create_code_point_instance: bad arguments", 0);
123 		RETURN_NULL();
124 	}
125 
126 	CodePointBreakIterator *cpbi = new CodePointBreakIterator();
127 	breakiterator_object_create(return_value, cpbi, 1);
128 }
129 
PHP_FUNCTION(breakiter_get_text)130 U_CFUNC PHP_FUNCTION(breakiter_get_text)
131 {
132 	BREAKITER_METHOD_INIT_VARS;
133 	object = ZEND_THIS;
134 
135 	if (zend_parse_parameters_none() == FAILURE) {
136 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
137 			"breakiter_get_text: bad arguments", 0);
138 		RETURN_FALSE;
139 	}
140 
141 	BREAKITER_METHOD_FETCH_OBJECT;
142 
143 	if (Z_ISUNDEF(bio->text)) {
144 		RETURN_NULL();
145 	} else {
146 		ZVAL_COPY(return_value, &bio->text);
147 	}
148 }
149 
PHP_FUNCTION(breakiter_set_text)150 U_CFUNC PHP_FUNCTION(breakiter_set_text)
151 {
152 	UText	*ut = NULL;
153 	zend_string	*text;
154 	BREAKITER_METHOD_INIT_VARS;
155 	object = ZEND_THIS;
156 
157 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &text) == FAILURE) {
158 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
159 			"breakiter_set_text: bad arguments", 0);
160 		RETURN_FALSE;
161 	}
162 
163 	BREAKITER_METHOD_FETCH_OBJECT;
164 
165 	ut = utext_openUTF8(ut, ZSTR_VAL(text), ZSTR_LEN(text), BREAKITER_ERROR_CODE_P(bio));
166 	INTL_METHOD_CHECK_STATUS_OR_NULL(bio, "breakiter_set_text: error opening UText");
167 
168 	bio->biter->setText(ut, BREAKITER_ERROR_CODE(bio));
169 	utext_close(ut); /* ICU shallow clones the UText */
170 	INTL_METHOD_CHECK_STATUS_OR_NULL(bio, "breakiter_set_text: error calling "
171 		"BreakIterator::setText()");
172 
173 	/* When ICU clones the UText, it does not copy the buffer, so we have to
174 	 * keep the string buffer around by holding a reference to its zval. This
175 	 * also allows a faste implementation of getText() */
176 	zval_ptr_dtor(&bio->text);
177 	ZVAL_STR_COPY(&bio->text, text);
178 
179 	RETURN_TRUE;
180 }
181 
_breakiter_no_args_ret_int32(const char * func_name,int32_t (BreakIterator::* func)(),INTERNAL_FUNCTION_PARAMETERS)182 static void _breakiter_no_args_ret_int32(
183 		const char *func_name,
184 		int32_t (BreakIterator::*func)(),
185 		INTERNAL_FUNCTION_PARAMETERS)
186 {
187 	char	*msg;
188 	BREAKITER_METHOD_INIT_VARS;
189 	object = ZEND_THIS;
190 
191 	if (zend_parse_parameters_none() == FAILURE) {
192 		spprintf(&msg, 0, "%s: bad arguments", func_name);
193 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
194 		efree(msg);
195 		RETURN_FALSE;
196 	}
197 
198 	BREAKITER_METHOD_FETCH_OBJECT;
199 
200 	int32_t res = (bio->biter->*func)();
201 
202 	RETURN_LONG((zend_long)res);
203 }
204 
_breakiter_int32_ret_int32(const char * func_name,int32_t (BreakIterator::* func)(int32_t),INTERNAL_FUNCTION_PARAMETERS)205 static void _breakiter_int32_ret_int32(
206 		const char *func_name,
207 		int32_t (BreakIterator::*func)(int32_t),
208 		INTERNAL_FUNCTION_PARAMETERS)
209 {
210 	char	*msg;
211 	zend_long	arg;
212 	BREAKITER_METHOD_INIT_VARS;
213 	object = ZEND_THIS;
214 
215 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &arg) == FAILURE) {
216 		spprintf(&msg, 0, "%s: bad arguments", func_name);
217 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
218 		efree(msg);
219 		RETURN_FALSE;
220 	}
221 
222 	BREAKITER_METHOD_FETCH_OBJECT;
223 
224 	if (arg < INT32_MIN || arg > INT32_MAX) {
225 		spprintf(&msg, 0, "%s: offset argument is outside bounds of "
226 				"a 32-bit wide integer", func_name);
227 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1);
228 		efree(msg);
229 		RETURN_FALSE;
230 	}
231 
232 	int32_t res = (bio->biter->*func)((int32_t)arg);
233 
234 	RETURN_LONG((zend_long)res);
235 }
236 
PHP_FUNCTION(breakiter_first)237 U_CFUNC PHP_FUNCTION(breakiter_first)
238 {
239 	_breakiter_no_args_ret_int32("breakiter_first",
240 			&BreakIterator::first,
241 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
242 }
243 
PHP_FUNCTION(breakiter_last)244 U_CFUNC PHP_FUNCTION(breakiter_last)
245 {
246 	_breakiter_no_args_ret_int32("breakiter_last",
247 			&BreakIterator::last,
248 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
249 }
250 
PHP_FUNCTION(breakiter_previous)251 U_CFUNC PHP_FUNCTION(breakiter_previous)
252 {
253 	_breakiter_no_args_ret_int32("breakiter_previous",
254 			&BreakIterator::previous,
255 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
256 }
257 
PHP_FUNCTION(breakiter_next)258 U_CFUNC PHP_FUNCTION(breakiter_next)
259 {
260 	bool no_arg_version = false;
261 
262 	if (ZEND_NUM_ARGS() == 0) {
263 		no_arg_version = true;
264 	} else if (ZEND_NUM_ARGS() == 1) {
265 		zval *arg;
266 		int res = zend_parse_parameters(ZEND_NUM_ARGS(), "z", &arg);
267 		assert(res == SUCCESS);
268 		if (Z_TYPE_P(arg) == IS_NULL) {
269 			no_arg_version = true;
270 			ZEND_NUM_ARGS() = 0; /* pretend we don't have any argument */
271 		} else {
272 			no_arg_version = false;
273 		}
274 	}
275 
276 	if (no_arg_version) {
277 		_breakiter_no_args_ret_int32("breakiter_next",
278 				&BreakIterator::next,
279 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
280 	} else {
281 		_breakiter_int32_ret_int32("breakiter_next",
282 				&BreakIterator::next,
283 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
284 	}
285 }
286 
PHP_FUNCTION(breakiter_current)287 U_CFUNC PHP_FUNCTION(breakiter_current)
288 {
289 	BREAKITER_METHOD_INIT_VARS;
290 	object = ZEND_THIS;
291 
292 	if (zend_parse_parameters_none() == FAILURE) {
293 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
294 				"breakiter_current: bad arguments", 0);
295 		RETURN_FALSE;
296 	}
297 
298 	BREAKITER_METHOD_FETCH_OBJECT;
299 
300 	int32_t res = bio->biter->current();
301 
302 	RETURN_LONG((zend_long)res);
303 }
304 
PHP_FUNCTION(breakiter_following)305 U_CFUNC PHP_FUNCTION(breakiter_following)
306 {
307 	_breakiter_int32_ret_int32("breakiter_following",
308 			&BreakIterator::following,
309 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
310 }
311 
PHP_FUNCTION(breakiter_preceding)312 U_CFUNC PHP_FUNCTION(breakiter_preceding)
313 {
314 	_breakiter_int32_ret_int32("breakiter_preceding",
315 			&BreakIterator::preceding,
316 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
317 }
318 
PHP_FUNCTION(breakiter_is_boundary)319 U_CFUNC PHP_FUNCTION(breakiter_is_boundary)
320 {
321 	zend_long offset;
322 	BREAKITER_METHOD_INIT_VARS;
323 	object = ZEND_THIS;
324 
325 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l",
326 			&offset) == FAILURE) {
327 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
328 				"breakiter_is_boundary: bad arguments", 0);
329 		RETURN_FALSE;
330 	}
331 
332 	if (offset < INT32_MIN || offset > INT32_MAX) {
333 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
334 				"breakiter_is_boundary: offset argument is outside bounds of "
335 				"a 32-bit wide integer", 0);
336 		RETURN_FALSE;
337 	}
338 
339 	BREAKITER_METHOD_FETCH_OBJECT;
340 
341 	UBool res = bio->biter->isBoundary((int32_t)offset);
342 
343 	RETURN_BOOL((zend_long)res);
344 }
345 
PHP_FUNCTION(breakiter_get_locale)346 U_CFUNC PHP_FUNCTION(breakiter_get_locale)
347 {
348 	zend_long	locale_type;
349 	BREAKITER_METHOD_INIT_VARS;
350 	object = ZEND_THIS;
351 
352 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &locale_type) == FAILURE) {
353 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
354 			"breakiter_get_locale: bad arguments", 0);
355 		RETURN_FALSE;
356 	}
357 
358 	if (locale_type != ULOC_ACTUAL_LOCALE && locale_type != ULOC_VALID_LOCALE) {
359 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
360 			"breakiter_get_locale: invalid locale type", 0);
361 		RETURN_FALSE;
362 	}
363 
364 	BREAKITER_METHOD_FETCH_OBJECT;
365 
366 	Locale locale = bio->biter->getLocale((ULocDataLocaleType)locale_type,
367 		BREAKITER_ERROR_CODE(bio));
368 	INTL_METHOD_CHECK_STATUS(bio,
369 		"breakiter_get_locale: Call to ICU method has failed");
370 
371 	RETURN_STRING(locale.getName());
372 }
373 
PHP_FUNCTION(breakiter_get_parts_iterator)374 U_CFUNC PHP_FUNCTION(breakiter_get_parts_iterator)
375 {
376 	zend_long key_type = 0;
377 	BREAKITER_METHOD_INIT_VARS;
378 	object = ZEND_THIS;
379 
380 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|l", &key_type) == FAILURE) {
381 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
382 			"breakiter_get_parts_iterator: bad arguments", 0);
383 		RETURN_FALSE;
384 	}
385 
386 	if (key_type != PARTS_ITERATOR_KEY_SEQUENTIAL
387 			&& key_type != PARTS_ITERATOR_KEY_LEFT
388 			&& key_type != PARTS_ITERATOR_KEY_RIGHT) {
389 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
390 			"breakiter_get_parts_iterator: bad key type", 0);
391 		RETURN_FALSE;
392 	}
393 
394 	BREAKITER_METHOD_FETCH_OBJECT;
395 
396 	IntlIterator_from_BreakIterator_parts(
397 		object, return_value, (parts_iter_key_type)key_type);
398 }
399 
PHP_FUNCTION(breakiter_get_error_code)400 U_CFUNC PHP_FUNCTION(breakiter_get_error_code)
401 {
402 	BREAKITER_METHOD_INIT_VARS;
403 	object = ZEND_THIS;
404 
405 	if (zend_parse_parameters_none() == FAILURE) {
406 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
407 			"breakiter_get_error_code: bad arguments", 0);
408 		RETURN_FALSE;
409 	}
410 
411 	/* Fetch the object (without resetting its last error code ). */
412 	bio = Z_INTL_BREAKITERATOR_P(object);
413 	if (bio == NULL)
414 		RETURN_FALSE;
415 
416 	RETURN_LONG((zend_long)BREAKITER_ERROR_CODE(bio));
417 }
418 
PHP_FUNCTION(breakiter_get_error_message)419 U_CFUNC PHP_FUNCTION(breakiter_get_error_message)
420 {
421 	zend_string* message = NULL;
422 	BREAKITER_METHOD_INIT_VARS;
423 	object = ZEND_THIS;
424 
425 	if (zend_parse_parameters_none() == FAILURE) {
426 		intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
427 			"breakiter_get_error_message: bad arguments", 0 );
428 		RETURN_FALSE;
429 	}
430 
431 
432 	/* Fetch the object (without resetting its last error code ). */
433 	bio = Z_INTL_BREAKITERATOR_P(object);
434 	if (bio == NULL)
435 		RETURN_FALSE;
436 
437 	/* Return last error message. */
438 	message = intl_error_get_message(BREAKITER_ERROR_P(bio));
439 	RETURN_STR(message);
440 }
441