1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13 */
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <unicode/brkiter.h>
20 #include "codepointiterator_internal.h"
21 
22 #include "breakiterator_iterators.h"
23 
24 extern "C" {
25 #include "../php_intl.h"
26 #define USE_BREAKITERATOR_POINTER 1
27 #include "breakiterator_class.h"
28 #include "../locale/locale.h"
29 #include <zend_exceptions.h>
30 #include <zend_interfaces.h>
31 }
32 
33 using PHP::CodePointBreakIterator;
34 using icu::BreakIterator;
35 using icu::Locale;
36 
PHP_METHOD(IntlBreakIterator,__construct)37 U_CFUNC PHP_METHOD(IntlBreakIterator, __construct)
38 {
39 	zend_throw_exception( NULL,
40 		"An object of this type cannot be created with the new operator",
41 		0 );
42 }
43 
_breakiter_factory(const char * func_name,BreakIterator * (* func)(const Locale &,UErrorCode &),INTERNAL_FUNCTION_PARAMETERS)44 static void _breakiter_factory(const char *func_name,
45 							   BreakIterator *(*func)(const Locale&, UErrorCode&),
46 							   INTERNAL_FUNCTION_PARAMETERS)
47 {
48 	BreakIterator	*biter;
49 	const char		*locale_str = NULL;
50 	size_t				dummy;
51 	char			*msg;
52 	UErrorCode		status = UErrorCode();
53 	intl_error_reset(NULL);
54 
55 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!",
56 			&locale_str, &dummy) == FAILURE) {
57 		RETURN_THROWS();
58 	}
59 
60 	if (locale_str == NULL) {
61 		locale_str = intl_locale_get_default();
62 	}
63 
64 	biter = func(Locale::createFromName(locale_str), status);
65 	intl_error_set_code(NULL, status);
66 	if (U_FAILURE(status)) {
67 		spprintf(&msg, 0, "%s: error creating BreakIterator",
68 				func_name);
69 		intl_error_set_custom_msg(NULL, msg, 1);
70 		efree(msg);
71 		RETURN_NULL();
72 	}
73 
74 	breakiterator_object_create(return_value, biter, 1);
75 }
76 
PHP_METHOD(IntlBreakIterator,createWordInstance)77 U_CFUNC PHP_METHOD(IntlBreakIterator, createWordInstance)
78 {
79 	_breakiter_factory("breakiter_create_word_instance",
80 			&BreakIterator::createWordInstance,
81 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
82 }
83 
PHP_METHOD(IntlBreakIterator,createLineInstance)84 U_CFUNC PHP_METHOD(IntlBreakIterator, createLineInstance)
85 {
86 	_breakiter_factory("breakiter_create_line_instance",
87 			&BreakIterator::createLineInstance,
88 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
89 }
90 
PHP_METHOD(IntlBreakIterator,createCharacterInstance)91 U_CFUNC PHP_METHOD(IntlBreakIterator, createCharacterInstance)
92 {
93 	_breakiter_factory("breakiter_create_character_instance",
94 			&BreakIterator::createCharacterInstance,
95 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
96 }
97 
PHP_METHOD(IntlBreakIterator,createSentenceInstance)98 U_CFUNC PHP_METHOD(IntlBreakIterator, createSentenceInstance)
99 {
100 	_breakiter_factory("breakiter_create_sentence_instance",
101 			&BreakIterator::createSentenceInstance,
102 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
103 }
104 
PHP_METHOD(IntlBreakIterator,createTitleInstance)105 U_CFUNC PHP_METHOD(IntlBreakIterator, createTitleInstance)
106 {
107 	_breakiter_factory("breakiter_create_title_instance",
108 			&BreakIterator::createTitleInstance,
109 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
110 }
111 
PHP_METHOD(IntlBreakIterator,createCodePointInstance)112 U_CFUNC PHP_METHOD(IntlBreakIterator, createCodePointInstance)
113 {
114 	intl_error_reset(NULL);
115 
116 	if (zend_parse_parameters_none() == FAILURE) {
117 		RETURN_THROWS();
118 	}
119 
120 	CodePointBreakIterator *cpbi = new CodePointBreakIterator();
121 	breakiterator_object_create(return_value, cpbi, 1);
122 }
123 
PHP_METHOD(IntlBreakIterator,getText)124 U_CFUNC PHP_METHOD(IntlBreakIterator, getText)
125 {
126 	BREAKITER_METHOD_INIT_VARS;
127 	object = ZEND_THIS;
128 
129 	if (zend_parse_parameters_none() == FAILURE) {
130 		RETURN_THROWS();
131 	}
132 
133 	BREAKITER_METHOD_FETCH_OBJECT;
134 
135 	if (Z_ISUNDEF(bio->text)) {
136 		RETURN_NULL();
137 	} else {
138 		ZVAL_COPY(return_value, &bio->text);
139 	}
140 }
141 
PHP_METHOD(IntlBreakIterator,setText)142 U_CFUNC PHP_METHOD(IntlBreakIterator, setText)
143 {
144 	UText	*ut = NULL;
145 	zend_string	*text;
146 	BREAKITER_METHOD_INIT_VARS;
147 	object = ZEND_THIS;
148 
149 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &text) == FAILURE) {
150 		RETURN_THROWS();
151 	}
152 
153 	BREAKITER_METHOD_FETCH_OBJECT;
154 
155 	ut = utext_openUTF8(ut, ZSTR_VAL(text), ZSTR_LEN(text), BREAKITER_ERROR_CODE_P(bio));
156 	INTL_METHOD_CHECK_STATUS(bio, "breakiter_set_text: error opening UText");
157 
158 	bio->biter->setText(ut, BREAKITER_ERROR_CODE(bio));
159 	utext_close(ut); /* ICU shallow clones the UText */
160 	INTL_METHOD_CHECK_STATUS(bio, "breakiter_set_text: error calling "
161 		"BreakIterator::setText()");
162 
163 	/* When ICU clones the UText, it does not copy the buffer, so we have to
164 	 * keep the string buffer around by holding a reference to its zval. This
165 	 * also allows a faste implementation of getText() */
166 	zval_ptr_dtor(&bio->text);
167 	ZVAL_STR_COPY(&bio->text, text);
168 
169 	RETURN_TRUE;
170 }
171 
_breakiter_no_args_ret_int32(int32_t (BreakIterator::* func)(),INTERNAL_FUNCTION_PARAMETERS)172 static void _breakiter_no_args_ret_int32(
173 		int32_t (BreakIterator::*func)(),
174 		INTERNAL_FUNCTION_PARAMETERS)
175 {
176 	BREAKITER_METHOD_INIT_VARS;
177 	object = ZEND_THIS;
178 
179 	if (zend_parse_parameters_none() == FAILURE) {
180 		RETURN_THROWS();
181 	}
182 
183 	BREAKITER_METHOD_FETCH_OBJECT;
184 
185 	int32_t res = (bio->biter->*func)();
186 
187 	RETURN_LONG((zend_long)res);
188 }
189 
_breakiter_int32_ret_int32(int32_t (BreakIterator::* func)(int32_t),INTERNAL_FUNCTION_PARAMETERS)190 static void _breakiter_int32_ret_int32(
191 		int32_t (BreakIterator::*func)(int32_t),
192 		INTERNAL_FUNCTION_PARAMETERS)
193 {
194 	zend_long	arg;
195 	BREAKITER_METHOD_INIT_VARS;
196 	object = ZEND_THIS;
197 
198 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &arg) == FAILURE) {
199 		RETURN_THROWS();
200 	}
201 
202 	BREAKITER_METHOD_FETCH_OBJECT;
203 
204 	if (UNEXPECTED(arg < INT32_MIN || arg > INT32_MAX)) {
205 		zend_argument_value_error(1, "must be between %d and %d", INT32_MIN, INT32_MAX);
206 		RETURN_THROWS();
207 	}
208 
209 	int32_t res = (bio->biter->*func)((int32_t)arg);
210 
211 	RETURN_LONG((zend_long)res);
212 }
213 
PHP_METHOD(IntlBreakIterator,first)214 U_CFUNC PHP_METHOD(IntlBreakIterator, first)
215 {
216 	_breakiter_no_args_ret_int32(&BreakIterator::first,
217 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
218 }
219 
PHP_METHOD(IntlBreakIterator,last)220 U_CFUNC PHP_METHOD(IntlBreakIterator, last)
221 {
222 	_breakiter_no_args_ret_int32(&BreakIterator::last,
223 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
224 }
225 
PHP_METHOD(IntlBreakIterator,previous)226 U_CFUNC PHP_METHOD(IntlBreakIterator, previous)
227 {
228 	_breakiter_no_args_ret_int32(&BreakIterator::previous,
229 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
230 }
231 
PHP_METHOD(IntlBreakIterator,next)232 U_CFUNC PHP_METHOD(IntlBreakIterator, next)
233 {
234 	zval *arg = NULL;
235 
236 	if (ZEND_NUM_ARGS() == 0) {
237 		goto no_arg_version;
238 	}
239 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "z!", &arg) == FAILURE) {
240 		RETURN_THROWS();
241 	}
242 
243 	if (arg == NULL) {
244 		ZEND_NUM_ARGS() = 0; /* pretend we don't have any argument */
245 		no_arg_version:
246 		_breakiter_no_args_ret_int32(&BreakIterator::next,
247 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
248 	} else {
249 		_breakiter_int32_ret_int32(&BreakIterator::next,
250 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
251 	}
252 }
253 
PHP_METHOD(IntlBreakIterator,current)254 U_CFUNC PHP_METHOD(IntlBreakIterator, current)
255 {
256 	BREAKITER_METHOD_INIT_VARS;
257 	object = ZEND_THIS;
258 
259 	if (zend_parse_parameters_none() == FAILURE) {
260 		RETURN_THROWS();
261 	}
262 
263 	BREAKITER_METHOD_FETCH_OBJECT;
264 
265 	int32_t res = bio->biter->current();
266 
267 	RETURN_LONG((zend_long)res);
268 }
269 
PHP_METHOD(IntlBreakIterator,following)270 U_CFUNC PHP_METHOD(IntlBreakIterator, following)
271 {
272 	_breakiter_int32_ret_int32(
273 			&BreakIterator::following,
274 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
275 }
276 
PHP_METHOD(IntlBreakIterator,preceding)277 U_CFUNC PHP_METHOD(IntlBreakIterator, preceding)
278 {
279 	_breakiter_int32_ret_int32(
280 			&BreakIterator::preceding,
281 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
282 }
283 
PHP_METHOD(IntlBreakIterator,isBoundary)284 U_CFUNC PHP_METHOD(IntlBreakIterator, isBoundary)
285 {
286 	zend_long offset;
287 	BREAKITER_METHOD_INIT_VARS;
288 	object = ZEND_THIS;
289 
290 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l",
291 			&offset) == FAILURE) {
292 		RETURN_THROWS();
293 	}
294 
295 	if (UNEXPECTED(offset < INT32_MIN || offset > INT32_MAX)) {
296 		zend_argument_value_error(1, "must be between %d and %d", INT32_MIN, INT32_MAX);
297 		RETURN_THROWS();
298 	}
299 
300 	BREAKITER_METHOD_FETCH_OBJECT;
301 
302 	UBool res = bio->biter->isBoundary((int32_t)offset);
303 
304 	RETURN_BOOL((zend_long)res);
305 }
306 
PHP_METHOD(IntlBreakIterator,getLocale)307 U_CFUNC PHP_METHOD(IntlBreakIterator, getLocale)
308 {
309 	zend_long	locale_type;
310 	BREAKITER_METHOD_INIT_VARS;
311 	object = ZEND_THIS;
312 
313 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &locale_type) == FAILURE) {
314 		RETURN_THROWS();
315 	}
316 
317 	/* Change to ValueError? */
318 	if (locale_type != ULOC_ACTUAL_LOCALE && locale_type != ULOC_VALID_LOCALE) {
319 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
320 			"breakiter_get_locale: invalid locale type", 0);
321 		RETURN_FALSE;
322 	}
323 
324 	BREAKITER_METHOD_FETCH_OBJECT;
325 
326 	Locale locale = bio->biter->getLocale((ULocDataLocaleType)locale_type,
327 		BREAKITER_ERROR_CODE(bio));
328 	INTL_METHOD_CHECK_STATUS(bio,
329 		"breakiter_get_locale: Call to ICU method has failed");
330 
331 	RETURN_STRING(locale.getName());
332 }
333 
PHP_METHOD(IntlBreakIterator,getPartsIterator)334 U_CFUNC PHP_METHOD(IntlBreakIterator, getPartsIterator)
335 {
336 	zend_long key_type = 0;
337 	BREAKITER_METHOD_INIT_VARS;
338 	object = ZEND_THIS;
339 
340 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "|l", &key_type) == FAILURE) {
341 		RETURN_THROWS();
342 	}
343 
344 	if (key_type != PARTS_ITERATOR_KEY_SEQUENTIAL
345 			&& key_type != PARTS_ITERATOR_KEY_LEFT
346 			&& key_type != PARTS_ITERATOR_KEY_RIGHT) {
347 		zend_argument_value_error(1, "must be one of IntlPartsIterator::KEY_SEQUENTIAL, "
348 			"IntlPartsIterator::KEY_LEFT, or IntlPartsIterator::KEY_RIGHT");
349 		RETURN_THROWS();
350 	}
351 
352 	BREAKITER_METHOD_FETCH_OBJECT;
353 
354 	IntlIterator_from_BreakIterator_parts(
355 		object, return_value, (parts_iter_key_type)key_type);
356 }
357 
PHP_METHOD(IntlBreakIterator,getErrorCode)358 U_CFUNC PHP_METHOD(IntlBreakIterator, getErrorCode)
359 {
360 	BREAKITER_METHOD_INIT_VARS;
361 	object = ZEND_THIS;
362 
363 	if (zend_parse_parameters_none() == FAILURE) {
364 		RETURN_THROWS();
365 	}
366 
367 	/* Fetch the object (without resetting its last error code ). */
368 	bio = Z_INTL_BREAKITERATOR_P(object);
369 	RETURN_LONG((zend_long)BREAKITER_ERROR_CODE(bio));
370 }
371 
PHP_METHOD(IntlBreakIterator,getErrorMessage)372 U_CFUNC PHP_METHOD(IntlBreakIterator, getErrorMessage)
373 {
374 	zend_string* message = NULL;
375 	BREAKITER_METHOD_INIT_VARS;
376 	object = ZEND_THIS;
377 
378 	if (zend_parse_parameters_none() == FAILURE) {
379 		RETURN_THROWS();
380 	}
381 
382 
383 	/* Fetch the object (without resetting its last error code ). */
384 	bio = Z_INTL_BREAKITERATOR_P(object);
385 
386 	/* Return last error message. */
387 	message = intl_error_get_message(BREAKITER_ERROR_P(bio));
388 	RETURN_STR(message);
389 }
390 
PHP_METHOD(IntlBreakIterator,getIterator)391 U_CFUNC PHP_METHOD(IntlBreakIterator, getIterator)
392 {
393 	if (zend_parse_parameters_none() == FAILURE) {
394 		return;
395 	}
396 
397 	zend_create_internal_iterator_zval(return_value, ZEND_THIS);
398 }
399