1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include <unicode/brkiter.h>
22 #include <unicode/rbbi.h>
23 #include "codepointiterator_internal.h"
24 
25 #include "breakiterator_iterators.h"
26 
27 #include <typeinfo>
28 
29 extern "C" {
30 #define USE_BREAKITERATOR_POINTER 1
31 #include "breakiterator_class.h"
32 #include "breakiterator_methods.h"
33 #include "rulebasedbreakiterator_methods.h"
34 #include "codepointiterator_methods.h"
35 #include <zend_exceptions.h>
36 #include <zend_interfaces.h>
37 #include <assert.h>
38 }
39 
40 using PHP::CodePointBreakIterator;
41 
42 /* {{{ Global variables */
43 zend_class_entry *BreakIterator_ce_ptr;
44 zend_class_entry *RuleBasedBreakIterator_ce_ptr;
45 zend_class_entry *CodePointBreakIterator_ce_ptr;
46 zend_object_handlers BreakIterator_handlers;
47 /* }}} */
48 
breakiterator_object_create(zval * object,BreakIterator * biter TSRMLS_DC)49 U_CFUNC	void breakiterator_object_create(zval *object,
50 										 BreakIterator *biter TSRMLS_DC)
51 {
52 	UClassID classId = biter->getDynamicClassID();
53 	zend_class_entry *ce;
54 
55 	if (classId == RuleBasedBreakIterator::getStaticClassID()) {
56 		ce = RuleBasedBreakIterator_ce_ptr;
57 	} else if (classId == CodePointBreakIterator::getStaticClassID()) {
58 		ce = CodePointBreakIterator_ce_ptr;
59 	} else {
60 		ce = BreakIterator_ce_ptr;
61 	}
62 
63 	object_init_ex(object, ce);
64 	breakiterator_object_construct(object, biter TSRMLS_CC);
65 }
66 
breakiterator_object_construct(zval * object,BreakIterator * biter TSRMLS_DC)67 U_CFUNC void breakiterator_object_construct(zval *object,
68 											BreakIterator *biter TSRMLS_DC)
69 {
70 	BreakIterator_object *bio;
71 
72 	BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK; //populate to from object
73 	assert(bio->biter == NULL);
74 	bio->biter = biter;
75 }
76 
77 /* {{{ compare handler for BreakIterator */
BreakIterator_compare_objects(zval * object1,zval * object2 TSRMLS_DC)78 static int BreakIterator_compare_objects(zval *object1,
79 										 zval *object2 TSRMLS_DC)
80 {
81 	BreakIterator_object	*bio1,
82 							*bio2;
83 
84 	bio1 = (BreakIterator_object*)zend_object_store_get_object(object1 TSRMLS_CC);
85 	bio2 = (BreakIterator_object*)zend_object_store_get_object(object2 TSRMLS_CC);
86 
87 	if (bio1->biter == NULL || bio2->biter == NULL) {
88 		return bio1->biter == bio2->biter ? 0 : 1;
89 	}
90 
91 	return *bio1->biter == *bio2->biter ? 0 : 1;
92 }
93 /* }}} */
94 
95 /* {{{ clone handler for BreakIterator */
BreakIterator_clone_obj(zval * object TSRMLS_DC)96 static zend_object_value BreakIterator_clone_obj(zval *object TSRMLS_DC)
97 {
98 	BreakIterator_object	*bio_orig,
99 							*bio_new;
100 	zend_object_value		ret_val;
101 
102 	bio_orig = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
103 	intl_errors_reset(INTL_DATA_ERROR_P(bio_orig) TSRMLS_CC);
104 
105 	ret_val = BreakIterator_ce_ptr->create_object(Z_OBJCE_P(object) TSRMLS_CC);
106 	bio_new  = (BreakIterator_object*)zend_object_store_get_object_by_handle(
107 			ret_val.handle TSRMLS_CC);
108 
109 	zend_objects_clone_members(&bio_new->zo, ret_val,
110 		&bio_orig->zo, Z_OBJ_HANDLE_P(object) TSRMLS_CC);
111 
112 	if (bio_orig->biter != NULL) {
113 		BreakIterator *new_biter;
114 
115 		new_biter = bio_orig->biter->clone();
116 		if (!new_biter) {
117 			char *err_msg;
118 			intl_errors_set_code(BREAKITER_ERROR_P(bio_orig),
119 				U_MEMORY_ALLOCATION_ERROR TSRMLS_CC);
120 			intl_errors_set_custom_msg(BREAKITER_ERROR_P(bio_orig),
121 				"Could not clone BreakIterator", 0 TSRMLS_CC);
122 			err_msg = intl_error_get_message(BREAKITER_ERROR_P(bio_orig) TSRMLS_CC);
123 			zend_throw_exception(NULL, err_msg, 0 TSRMLS_CC);
124 			efree(err_msg);
125 		} else {
126 			bio_new->biter = new_biter;
127 			bio_new->text = bio_orig->text;
128 			if (bio_new->text) {
129 				zval_add_ref(&bio_new->text);
130 			}
131 		}
132 	} else {
133 		zend_throw_exception(NULL, "Cannot clone unconstructed BreakIterator", 0 TSRMLS_CC);
134 	}
135 
136 	return ret_val;
137 }
138 /* }}} */
139 
140 /* {{{ get_debug_info handler for BreakIterator */
BreakIterator_get_debug_info(zval * object,int * is_temp TSRMLS_DC)141 static HashTable *BreakIterator_get_debug_info(zval *object, int *is_temp TSRMLS_DC)
142 {
143 	zval					zv = zval_used_for_init;
144 	BreakIterator_object	*bio;
145 	const BreakIterator		*biter;
146 
147 	*is_temp = 1;
148 
149 	array_init_size(&zv, 8);
150 
151 	bio  = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
152 	biter = bio->biter;
153 
154 	if (biter == NULL) {
155 		add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 0);
156 		return Z_ARRVAL(zv);
157 	}
158 	add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 1);
159 
160 	if (bio->text == NULL) {
161 		add_assoc_null_ex(&zv, "text", sizeof("text"));
162 	} else {
163 		zval_add_ref(&bio->text);
164 		add_assoc_zval_ex(&zv, "text", sizeof("text"), bio->text);
165 	}
166 
167 	add_assoc_string_ex(&zv, "type", sizeof("type"),
168 			const_cast<char*>(typeid(*biter).name()), 1);
169 
170 	return Z_ARRVAL(zv);
171 }
172 /* }}} */
173 
174 /* {{{ void breakiterator_object_init(BreakIterator_object* to)
175  * Initialize internals of BreakIterator_object not specific to zend standard objects.
176  */
breakiterator_object_init(BreakIterator_object * bio TSRMLS_DC)177 static void breakiterator_object_init(BreakIterator_object *bio TSRMLS_DC)
178 {
179 	intl_error_init(BREAKITER_ERROR_P(bio) TSRMLS_CC);
180 	bio->biter = NULL;
181 	bio->text = NULL;
182 }
183 /* }}} */
184 
185 /* {{{ BreakIterator_objects_dtor */
BreakIterator_objects_dtor(void * object,zend_object_handle handle TSRMLS_DC)186 static void BreakIterator_objects_dtor(void *object,
187 									   zend_object_handle handle TSRMLS_DC)
188 {
189 	zend_objects_destroy_object((zend_object*)object, handle TSRMLS_CC);
190 }
191 /* }}} */
192 
193 /* {{{ BreakIterator_objects_free */
BreakIterator_objects_free(zend_object * object TSRMLS_DC)194 static void BreakIterator_objects_free(zend_object *object TSRMLS_DC)
195 {
196 	BreakIterator_object* bio = (BreakIterator_object*) object;
197 
198 	if (bio->text) {
199 		zval_ptr_dtor(&bio->text);
200 	}
201 	if (bio->biter) {
202 		delete bio->biter;
203 		bio->biter = NULL;
204 	}
205 	intl_error_reset(BREAKITER_ERROR_P(bio) TSRMLS_CC);
206 
207 	zend_object_std_dtor(&bio->zo TSRMLS_CC);
208 
209 	efree(bio);
210 }
211 /* }}} */
212 
213 /* {{{ BreakIterator_object_create */
BreakIterator_object_create(zend_class_entry * ce TSRMLS_DC)214 static zend_object_value BreakIterator_object_create(zend_class_entry *ce TSRMLS_DC)
215 {
216 	zend_object_value		retval;
217 	BreakIterator_object*	intern;
218 
219 	intern = (BreakIterator_object*)ecalloc(1, sizeof(BreakIterator_object));
220 
221 	zend_object_std_init(&intern->zo, ce TSRMLS_CC);
222 #if PHP_VERSION_ID < 50399
223     zend_hash_copy(intern->zo.properties, &(ce->default_properties),
224         (copy_ctor_func_t) zval_add_ref, NULL, sizeof(zval*));
225 #else
226     object_properties_init((zend_object*) intern, ce);
227 #endif
228 	breakiterator_object_init(intern TSRMLS_CC);
229 
230 	retval.handle = zend_objects_store_put(
231 		intern,
232 		BreakIterator_objects_dtor,
233 		(zend_objects_free_object_storage_t) BreakIterator_objects_free,
234 		NULL TSRMLS_CC);
235 
236 	retval.handlers = &BreakIterator_handlers;
237 
238 	return retval;
239 }
240 /* }}} */
241 
242 /* {{{ BreakIterator/RuleBasedBreakIterator methods arguments info */
243 
244 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_void, 0, 0, 0)
245 ZEND_END_ARG_INFO()
246 
247 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_locale, 0, 0, 0)
248 	ZEND_ARG_INFO(0, locale)
249 ZEND_END_ARG_INFO()
250 
251 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_setText, 0, 0, 1)
252 	ZEND_ARG_INFO(0, text)
253 ZEND_END_ARG_INFO()
254 
255 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_next, 0, 0, 0)
256 	ZEND_ARG_INFO(0, offset)
257 ZEND_END_ARG_INFO()
258 
259 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_offset, 0, 0, 1)
260 	ZEND_ARG_INFO(0, offset)
261 ZEND_END_ARG_INFO()
262 
263 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_get_locale, 0, 0, 1)
264 	ZEND_ARG_INFO(0, locale_type)
265 ZEND_END_ARG_INFO()
266 
267 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_getPartsIterator, 0, 0, 0)
268 	ZEND_ARG_INFO(0, key_type)
269 ZEND_END_ARG_INFO()
270 
271 ZEND_BEGIN_ARG_INFO_EX(ainfo_rbbi___construct, 0, 0, 1)
272 	ZEND_ARG_INFO(0, rules)
273 	ZEND_ARG_INFO(0, areCompiled)
274 ZEND_END_ARG_INFO()
275 
276 /* }}} */
277 
278 /* {{{ BreakIterator_class_functions
279  * Every 'BreakIterator' class method has an entry in this table
280  */
281 static const zend_function_entry BreakIterator_class_functions[] = {
282 	PHP_ME(BreakIterator,					__construct,							ainfo_biter_void,					ZEND_ACC_PRIVATE)
283 	PHP_ME_MAPPING(createWordInstance,		breakiter_create_word_instance,			ainfo_biter_locale,					ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
284 	PHP_ME_MAPPING(createLineInstance,		breakiter_create_line_instance,			ainfo_biter_locale,					ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
285 	PHP_ME_MAPPING(createCharacterInstance,	breakiter_create_character_instance,	ainfo_biter_locale,					ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
286 	PHP_ME_MAPPING(createSentenceInstance,	breakiter_create_sentence_instance,		ainfo_biter_locale,					ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
287 	PHP_ME_MAPPING(createTitleInstance,		breakiter_create_title_instance,		ainfo_biter_locale,					ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
288 	PHP_ME_MAPPING(createCodePointInstance,	breakiter_create_code_point_instance,	ainfo_biter_void,					ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
289 	PHP_ME_MAPPING(getText,					breakiter_get_text,						ainfo_biter_void,					ZEND_ACC_PUBLIC)
290 	PHP_ME_MAPPING(setText,					breakiter_set_text,						ainfo_biter_setText,				ZEND_ACC_PUBLIC)
291 	PHP_ME_MAPPING(first,					breakiter_first,						ainfo_biter_void,					ZEND_ACC_PUBLIC)
292 	PHP_ME_MAPPING(last,					breakiter_last,							ainfo_biter_void,					ZEND_ACC_PUBLIC)
293 	PHP_ME_MAPPING(previous,				breakiter_previous,						ainfo_biter_void,					ZEND_ACC_PUBLIC)
294 	PHP_ME_MAPPING(next,					breakiter_next,							ainfo_biter_next,					ZEND_ACC_PUBLIC)
295 	PHP_ME_MAPPING(current,					breakiter_current,						ainfo_biter_void,					ZEND_ACC_PUBLIC)
296 	PHP_ME_MAPPING(following,				breakiter_following,					ainfo_biter_offset,					ZEND_ACC_PUBLIC)
297 	PHP_ME_MAPPING(preceding,				breakiter_preceding,					ainfo_biter_offset,					ZEND_ACC_PUBLIC)
298 	PHP_ME_MAPPING(isBoundary,				breakiter_is_boundary,					ainfo_biter_offset,					ZEND_ACC_PUBLIC)
299 	PHP_ME_MAPPING(getLocale,				breakiter_get_locale,					ainfo_biter_get_locale,				ZEND_ACC_PUBLIC)
300 	PHP_ME_MAPPING(getPartsIterator,		breakiter_get_parts_iterator,			ainfo_biter_getPartsIterator,		ZEND_ACC_PUBLIC)
301 
302 	PHP_ME_MAPPING(getErrorCode,			breakiter_get_error_code,				ainfo_biter_void,					ZEND_ACC_PUBLIC)
303 	PHP_ME_MAPPING(getErrorMessage,			breakiter_get_error_message,			ainfo_biter_void,					ZEND_ACC_PUBLIC)
304 	PHP_FE_END
305 };
306 /* }}} */
307 
308 /* {{{ RuleBasedBreakIterator_class_functions
309  */
310 static const zend_function_entry RuleBasedBreakIterator_class_functions[] = {
311 	PHP_ME(IntlRuleBasedBreakIterator,		__construct,							ainfo_rbbi___construct,				ZEND_ACC_PUBLIC)
312 	PHP_ME_MAPPING(getRules,				rbbi_get_rules,							ainfo_biter_void,					ZEND_ACC_PUBLIC)
313 	PHP_ME_MAPPING(getRuleStatus,			rbbi_get_rule_status,					ainfo_biter_void,					ZEND_ACC_PUBLIC)
314 	PHP_ME_MAPPING(getRuleStatusVec,		rbbi_get_rule_status_vec,				ainfo_biter_void,					ZEND_ACC_PUBLIC)
315 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
316 	PHP_ME_MAPPING(getBinaryRules,			rbbi_get_binary_rules,					ainfo_biter_void,					ZEND_ACC_PUBLIC)
317 #endif
318 	PHP_FE_END
319 };
320 /* }}} */
321 
322 /* {{{ CodePointBreakIterator_class_functions
323  */
324 static const zend_function_entry CodePointBreakIterator_class_functions[] = {
325 	PHP_ME_MAPPING(getLastCodePoint,		cpbi_get_last_code_point,				ainfo_biter_void,					ZEND_ACC_PUBLIC)
326 	PHP_FE_END
327 };
328 /* }}} */
329 
330 
331 /* {{{ breakiterator_register_BreakIterator_class
332  * Initialize 'BreakIterator' class
333  */
breakiterator_register_BreakIterator_class(TSRMLS_D)334 U_CFUNC void breakiterator_register_BreakIterator_class(TSRMLS_D)
335 {
336 	zend_class_entry ce;
337 
338 	/* Create and register 'BreakIterator' class. */
339 	INIT_CLASS_ENTRY(ce, "IntlBreakIterator", BreakIterator_class_functions);
340 	ce.create_object = BreakIterator_object_create;
341 	ce.get_iterator = _breakiterator_get_iterator;
342 	BreakIterator_ce_ptr = zend_register_internal_class(&ce TSRMLS_CC);
343 
344 	memcpy(&BreakIterator_handlers, zend_get_std_object_handlers(),
345 		sizeof BreakIterator_handlers);
346 	BreakIterator_handlers.compare_objects = BreakIterator_compare_objects;
347 	BreakIterator_handlers.clone_obj = BreakIterator_clone_obj;
348 	BreakIterator_handlers.get_debug_info = BreakIterator_get_debug_info;
349 
350 	zend_class_implements(BreakIterator_ce_ptr TSRMLS_CC, 1,
351 			zend_ce_traversable);
352 
353 	zend_declare_class_constant_long(BreakIterator_ce_ptr,
354 		"DONE", sizeof("DONE") - 1, BreakIterator::DONE TSRMLS_CC );
355 
356 	/* Declare constants that are defined in the C header */
357 #define BREAKITER_DECL_LONG_CONST(name) \
358 	zend_declare_class_constant_long(BreakIterator_ce_ptr, #name, \
359 		sizeof(#name) - 1, UBRK_ ## name TSRMLS_CC)
360 
361 	BREAKITER_DECL_LONG_CONST(WORD_NONE);
362 	BREAKITER_DECL_LONG_CONST(WORD_NONE_LIMIT);
363 	BREAKITER_DECL_LONG_CONST(WORD_NUMBER);
364 	BREAKITER_DECL_LONG_CONST(WORD_NUMBER_LIMIT);
365 	BREAKITER_DECL_LONG_CONST(WORD_LETTER);
366 	BREAKITER_DECL_LONG_CONST(WORD_LETTER_LIMIT);
367 	BREAKITER_DECL_LONG_CONST(WORD_KANA);
368 	BREAKITER_DECL_LONG_CONST(WORD_KANA_LIMIT);
369 	BREAKITER_DECL_LONG_CONST(WORD_IDEO);
370 	BREAKITER_DECL_LONG_CONST(WORD_IDEO_LIMIT);
371 
372 	BREAKITER_DECL_LONG_CONST(LINE_SOFT);
373 	BREAKITER_DECL_LONG_CONST(LINE_SOFT_LIMIT);
374 	BREAKITER_DECL_LONG_CONST(LINE_HARD);
375 	BREAKITER_DECL_LONG_CONST(LINE_HARD_LIMIT);
376 
377 	BREAKITER_DECL_LONG_CONST(SENTENCE_TERM);
378 	BREAKITER_DECL_LONG_CONST(SENTENCE_TERM_LIMIT);
379 	BREAKITER_DECL_LONG_CONST(SENTENCE_SEP);
380 	BREAKITER_DECL_LONG_CONST(SENTENCE_SEP_LIMIT);
381 
382 #undef BREAKITER_DECL_LONG_CONST
383 
384 
385 	/* Create and register 'RuleBasedBreakIterator' class. */
386 	INIT_CLASS_ENTRY(ce, "IntlRuleBasedBreakIterator",
387 			RuleBasedBreakIterator_class_functions);
388 	RuleBasedBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
389 			BreakIterator_ce_ptr, NULL TSRMLS_CC);
390 
391 	/* Create and register 'CodePointBreakIterator' class. */
392 	INIT_CLASS_ENTRY(ce, "IntlCodePointBreakIterator",
393 			CodePointBreakIterator_class_functions);
394 	CodePointBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
395 			BreakIterator_ce_ptr, NULL TSRMLS_CC);
396 }
397 /* }}} */
398