1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13 */
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <unicode/brkiter.h>
20 #include "codepointiterator_internal.h"
21 
22 #include "breakiterator_iterators.h"
23 
24 extern "C" {
25 #include "../php_intl.h"
26 #define USE_BREAKITERATOR_POINTER 1
27 #include "breakiterator_class.h"
28 #include "../locale/locale.h"
29 #include <zend_exceptions.h>
30 #include <zend_interfaces.h>
31 }
32 
33 using PHP::CodePointBreakIterator;
34 using icu::BreakIterator;
35 using icu::Locale;
36 
PHP_METHOD(IntlBreakIterator,__construct)37 U_CFUNC PHP_METHOD(IntlBreakIterator, __construct)
38 {
39 	zend_throw_exception( NULL,
40 		"An object of this type cannot be created with the new operator",
41 		0 );
42 }
43 
_breakiter_factory(const char * func_name,BreakIterator * (* func)(const Locale &,UErrorCode &),INTERNAL_FUNCTION_PARAMETERS)44 static void _breakiter_factory(const char *func_name,
45 							   BreakIterator *(*func)(const Locale&, UErrorCode&),
46 							   INTERNAL_FUNCTION_PARAMETERS)
47 {
48 	BreakIterator	*biter;
49 	char		                *locale_str = NULL;
50 	size_t				dummy;
51 	char			*msg;
52 	UErrorCode		status = UErrorCode();
53 	intl_error_reset(NULL);
54 
55 	ZEND_PARSE_PARAMETERS_START(0, 1)
56 		Z_PARAM_OPTIONAL
57 		Z_PARAM_STRING_OR_NULL(locale_str, dummy)
58 	ZEND_PARSE_PARAMETERS_END();
59 
60 	if (locale_str == NULL) {
61 		locale_str = (char *)intl_locale_get_default();
62 	}
63 
64 	biter = func(Locale::createFromName(locale_str), status);
65 	intl_error_set_code(NULL, status);
66 	if (U_FAILURE(status)) {
67 		spprintf(&msg, 0, "%s: error creating BreakIterator",
68 				func_name);
69 		intl_error_set_custom_msg(NULL, msg, 1);
70 		efree(msg);
71 		RETURN_NULL();
72 	}
73 
74 	breakiterator_object_create(return_value, biter, 1);
75 }
76 
PHP_METHOD(IntlBreakIterator,createWordInstance)77 U_CFUNC PHP_METHOD(IntlBreakIterator, createWordInstance)
78 {
79 	_breakiter_factory("breakiter_create_word_instance",
80 			&BreakIterator::createWordInstance,
81 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
82 }
83 
PHP_METHOD(IntlBreakIterator,createLineInstance)84 U_CFUNC PHP_METHOD(IntlBreakIterator, createLineInstance)
85 {
86 	_breakiter_factory("breakiter_create_line_instance",
87 			&BreakIterator::createLineInstance,
88 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
89 }
90 
PHP_METHOD(IntlBreakIterator,createCharacterInstance)91 U_CFUNC PHP_METHOD(IntlBreakIterator, createCharacterInstance)
92 {
93 	_breakiter_factory("breakiter_create_character_instance",
94 			&BreakIterator::createCharacterInstance,
95 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
96 }
97 
PHP_METHOD(IntlBreakIterator,createSentenceInstance)98 U_CFUNC PHP_METHOD(IntlBreakIterator, createSentenceInstance)
99 {
100 	_breakiter_factory("breakiter_create_sentence_instance",
101 			&BreakIterator::createSentenceInstance,
102 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
103 }
104 
PHP_METHOD(IntlBreakIterator,createTitleInstance)105 U_CFUNC PHP_METHOD(IntlBreakIterator, createTitleInstance)
106 {
107 	_breakiter_factory("breakiter_create_title_instance",
108 			&BreakIterator::createTitleInstance,
109 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
110 }
111 
PHP_METHOD(IntlBreakIterator,createCodePointInstance)112 U_CFUNC PHP_METHOD(IntlBreakIterator, createCodePointInstance)
113 {
114 	intl_error_reset(NULL);
115 
116 	ZEND_PARSE_PARAMETERS_NONE();
117 
118 	CodePointBreakIterator *cpbi = new CodePointBreakIterator();
119 	breakiterator_object_create(return_value, cpbi, 1);
120 }
121 
PHP_METHOD(IntlBreakIterator,getText)122 U_CFUNC PHP_METHOD(IntlBreakIterator, getText)
123 {
124 	BREAKITER_METHOD_INIT_VARS;
125 	object = ZEND_THIS;
126 
127 	ZEND_PARSE_PARAMETERS_NONE();
128 
129 	BREAKITER_METHOD_FETCH_OBJECT;
130 
131 	if (Z_ISUNDEF(bio->text)) {
132 		RETURN_NULL();
133 	} else {
134 		ZVAL_COPY(return_value, &bio->text);
135 	}
136 }
137 
PHP_METHOD(IntlBreakIterator,setText)138 U_CFUNC PHP_METHOD(IntlBreakIterator, setText)
139 {
140 	UText	*ut = NULL;
141 	zend_string	*text;
142 	BREAKITER_METHOD_INIT_VARS;
143 	object = ZEND_THIS;
144 
145 	ZEND_PARSE_PARAMETERS_START(1, 1)
146 		Z_PARAM_STR(text)
147 	ZEND_PARSE_PARAMETERS_END();
148 
149 	BREAKITER_METHOD_FETCH_OBJECT;
150 
151 	ut = utext_openUTF8(ut, ZSTR_VAL(text), ZSTR_LEN(text), BREAKITER_ERROR_CODE_P(bio));
152 	INTL_METHOD_CHECK_STATUS(bio, "breakiter_set_text: error opening UText");
153 
154 	bio->biter->setText(ut, BREAKITER_ERROR_CODE(bio));
155 	utext_close(ut); /* ICU shallow clones the UText */
156 	INTL_METHOD_CHECK_STATUS(bio, "breakiter_set_text: error calling "
157 		"BreakIterator::setText()");
158 
159 	/* When ICU clones the UText, it does not copy the buffer, so we have to
160 	 * keep the string buffer around by holding a reference to its zval. This
161 	 * also allows a faste implementation of getText() */
162 	zval_ptr_dtor(&bio->text);
163 	ZVAL_STR_COPY(&bio->text, text);
164 
165 	RETURN_TRUE;
166 }
167 
_breakiter_no_args_ret_int32(int32_t (BreakIterator::* func)(),INTERNAL_FUNCTION_PARAMETERS)168 static void _breakiter_no_args_ret_int32(
169 		int32_t (BreakIterator::*func)(),
170 		INTERNAL_FUNCTION_PARAMETERS)
171 {
172 	BREAKITER_METHOD_INIT_VARS;
173 	object = ZEND_THIS;
174 
175 	ZEND_PARSE_PARAMETERS_NONE();
176 
177 	BREAKITER_METHOD_FETCH_OBJECT;
178 
179 	int32_t res = (bio->biter->*func)();
180 
181 	RETURN_LONG((zend_long)res);
182 }
183 
_breakiter_int32_ret_int32(int32_t (BreakIterator::* func)(int32_t),INTERNAL_FUNCTION_PARAMETERS)184 static void _breakiter_int32_ret_int32(
185 		int32_t (BreakIterator::*func)(int32_t),
186 		INTERNAL_FUNCTION_PARAMETERS)
187 {
188 	zend_long	arg;
189 	BREAKITER_METHOD_INIT_VARS;
190 	object = ZEND_THIS;
191 
192 	ZEND_PARSE_PARAMETERS_START(1, 1)
193 		Z_PARAM_LONG(arg)
194 	ZEND_PARSE_PARAMETERS_END();
195 
196 	BREAKITER_METHOD_FETCH_OBJECT;
197 
198 	if (UNEXPECTED(arg < INT32_MIN || arg > INT32_MAX)) {
199 		zend_argument_value_error(1, "must be between %d and %d", INT32_MIN, INT32_MAX);
200 		RETURN_THROWS();
201 	}
202 
203 	int32_t res = (bio->biter->*func)((int32_t)arg);
204 
205 	RETURN_LONG((zend_long)res);
206 }
207 
PHP_METHOD(IntlBreakIterator,first)208 U_CFUNC PHP_METHOD(IntlBreakIterator, first)
209 {
210 	_breakiter_no_args_ret_int32(&BreakIterator::first,
211 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
212 }
213 
PHP_METHOD(IntlBreakIterator,last)214 U_CFUNC PHP_METHOD(IntlBreakIterator, last)
215 {
216 	_breakiter_no_args_ret_int32(&BreakIterator::last,
217 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
218 }
219 
PHP_METHOD(IntlBreakIterator,previous)220 U_CFUNC PHP_METHOD(IntlBreakIterator, previous)
221 {
222 	_breakiter_no_args_ret_int32(&BreakIterator::previous,
223 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
224 }
225 
PHP_METHOD(IntlBreakIterator,next)226 U_CFUNC PHP_METHOD(IntlBreakIterator, next)
227 {
228 	zval *arg = NULL;
229 
230 	ZEND_PARSE_PARAMETERS_START(0, 1)
231 		Z_PARAM_OPTIONAL
232 		Z_PARAM_ZVAL_OR_NULL(arg)
233 	ZEND_PARSE_PARAMETERS_END();
234 
235 	if (arg == NULL) {
236 		ZEND_NUM_ARGS() = 0;
237 		_breakiter_no_args_ret_int32(&BreakIterator::next,
238 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
239 	} else {
240 		_breakiter_int32_ret_int32(&BreakIterator::next,
241 				INTERNAL_FUNCTION_PARAM_PASSTHRU);
242 	}
243 }
244 
PHP_METHOD(IntlBreakIterator,current)245 U_CFUNC PHP_METHOD(IntlBreakIterator, current)
246 {
247 	BREAKITER_METHOD_INIT_VARS;
248 	object = ZEND_THIS;
249 
250 	ZEND_PARSE_PARAMETERS_NONE();
251 
252 	BREAKITER_METHOD_FETCH_OBJECT;
253 
254 	int32_t res = bio->biter->current();
255 
256 	RETURN_LONG((zend_long)res);
257 }
258 
PHP_METHOD(IntlBreakIterator,following)259 U_CFUNC PHP_METHOD(IntlBreakIterator, following)
260 {
261 	_breakiter_int32_ret_int32(
262 			&BreakIterator::following,
263 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
264 }
265 
PHP_METHOD(IntlBreakIterator,preceding)266 U_CFUNC PHP_METHOD(IntlBreakIterator, preceding)
267 {
268 	_breakiter_int32_ret_int32(
269 			&BreakIterator::preceding,
270 			INTERNAL_FUNCTION_PARAM_PASSTHRU);
271 }
272 
PHP_METHOD(IntlBreakIterator,isBoundary)273 U_CFUNC PHP_METHOD(IntlBreakIterator, isBoundary)
274 {
275 	zend_long offset;
276 	BREAKITER_METHOD_INIT_VARS;
277 	object = ZEND_THIS;
278 
279 	ZEND_PARSE_PARAMETERS_START(1, 1)
280 		Z_PARAM_LONG(offset)
281 	ZEND_PARSE_PARAMETERS_END();
282 
283 	if (UNEXPECTED(offset < INT32_MIN || offset > INT32_MAX)) {
284 		zend_argument_value_error(1, "must be between %d and %d", INT32_MIN, INT32_MAX);
285 		RETURN_THROWS();
286 	}
287 
288 	BREAKITER_METHOD_FETCH_OBJECT;
289 
290 	UBool res = bio->biter->isBoundary((int32_t)offset);
291 
292 	RETURN_BOOL((zend_long)res);
293 }
294 
PHP_METHOD(IntlBreakIterator,getLocale)295 U_CFUNC PHP_METHOD(IntlBreakIterator, getLocale)
296 {
297 	zend_long	locale_type;
298 	BREAKITER_METHOD_INIT_VARS;
299 	object = ZEND_THIS;
300 
301 	ZEND_PARSE_PARAMETERS_START(1, 1)
302 		Z_PARAM_LONG(locale_type)
303 	ZEND_PARSE_PARAMETERS_END();
304 
305 	/* Change to ValueError? */
306 	if (locale_type != ULOC_ACTUAL_LOCALE && locale_type != ULOC_VALID_LOCALE) {
307 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
308 			"breakiter_get_locale: invalid locale type", 0);
309 		RETURN_FALSE;
310 	}
311 
312 	BREAKITER_METHOD_FETCH_OBJECT;
313 
314 	Locale locale = bio->biter->getLocale((ULocDataLocaleType)locale_type,
315 		BREAKITER_ERROR_CODE(bio));
316 	INTL_METHOD_CHECK_STATUS(bio,
317 		"breakiter_get_locale: Call to ICU method has failed");
318 
319 	RETURN_STRING(locale.getName());
320 }
321 
PHP_METHOD(IntlBreakIterator,getPartsIterator)322 U_CFUNC PHP_METHOD(IntlBreakIterator, getPartsIterator)
323 {
324 	zend_long key_type = 0;
325 	BREAKITER_METHOD_INIT_VARS;
326 	object = ZEND_THIS;
327 
328 	ZEND_PARSE_PARAMETERS_START(0, 1)
329 		Z_PARAM_OPTIONAL
330 		Z_PARAM_LONG(key_type)
331 	ZEND_PARSE_PARAMETERS_END();
332 
333 	if (key_type != PARTS_ITERATOR_KEY_SEQUENTIAL
334 			&& key_type != PARTS_ITERATOR_KEY_LEFT
335 			&& key_type != PARTS_ITERATOR_KEY_RIGHT) {
336 		zend_argument_value_error(1, "must be one of IntlPartsIterator::KEY_SEQUENTIAL, "
337 			"IntlPartsIterator::KEY_LEFT, or IntlPartsIterator::KEY_RIGHT");
338 		RETURN_THROWS();
339 	}
340 
341 	BREAKITER_METHOD_FETCH_OBJECT;
342 
343 	IntlIterator_from_BreakIterator_parts(
344 		object, return_value, (parts_iter_key_type)key_type);
345 }
346 
PHP_METHOD(IntlBreakIterator,getErrorCode)347 U_CFUNC PHP_METHOD(IntlBreakIterator, getErrorCode)
348 {
349 	BREAKITER_METHOD_INIT_VARS;
350 	object = ZEND_THIS;
351 
352 	ZEND_PARSE_PARAMETERS_NONE();
353 
354 	/* Fetch the object (without resetting its last error code ). */
355 	bio = Z_INTL_BREAKITERATOR_P(object);
356 	RETURN_LONG((zend_long)BREAKITER_ERROR_CODE(bio));
357 }
358 
PHP_METHOD(IntlBreakIterator,getErrorMessage)359 U_CFUNC PHP_METHOD(IntlBreakIterator, getErrorMessage)
360 {
361 	zend_string* message = NULL;
362 	BREAKITER_METHOD_INIT_VARS;
363 	object = ZEND_THIS;
364 
365 	ZEND_PARSE_PARAMETERS_NONE();
366 
367 	/* Fetch the object (without resetting its last error code ). */
368 	bio = Z_INTL_BREAKITERATOR_P(object);
369 
370 	/* Return last error message. */
371 	message = intl_error_get_message(BREAKITER_ERROR_P(bio));
372 	RETURN_STR(message);
373 }
374 
PHP_METHOD(IntlBreakIterator,getIterator)375 U_CFUNC PHP_METHOD(IntlBreakIterator, getIterator)
376 {
377 	ZEND_PARSE_PARAMETERS_NONE();
378 
379 	zend_create_internal_iterator_zval(return_value, ZEND_THIS);
380 }
381