1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Gustavo Lopes <cataphract@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include <unicode/brkiter.h>
22 #include <unicode/rbbi.h>
23 #include "codepointiterator_internal.h"
24
25 #include "breakiterator_iterators.h"
26
27 #include <typeinfo>
28
29 extern "C" {
30 #define USE_BREAKITERATOR_POINTER 1
31 #include "breakiterator_class.h"
32 #include "breakiterator_methods.h"
33 #include "rulebasedbreakiterator_methods.h"
34 #include "codepointiterator_methods.h"
35 #include <zend_exceptions.h>
36 #include <zend_interfaces.h>
37 #include <assert.h>
38 }
39
40 using PHP::CodePointBreakIterator;
41
42 /* {{{ Global variables */
43 zend_class_entry *BreakIterator_ce_ptr;
44 zend_class_entry *RuleBasedBreakIterator_ce_ptr;
45 zend_class_entry *CodePointBreakIterator_ce_ptr;
46 zend_object_handlers BreakIterator_handlers;
47 /* }}} */
48
breakiterator_object_create(zval * object,BreakIterator * biter TSRMLS_DC)49 U_CFUNC void breakiterator_object_create(zval *object,
50 BreakIterator *biter TSRMLS_DC)
51 {
52 UClassID classId = biter->getDynamicClassID();
53 zend_class_entry *ce;
54
55 if (classId == RuleBasedBreakIterator::getStaticClassID()) {
56 ce = RuleBasedBreakIterator_ce_ptr;
57 } else if (classId == CodePointBreakIterator::getStaticClassID()) {
58 ce = CodePointBreakIterator_ce_ptr;
59 } else {
60 ce = BreakIterator_ce_ptr;
61 }
62
63 object_init_ex(object, ce);
64 breakiterator_object_construct(object, biter TSRMLS_CC);
65 }
66
breakiterator_object_construct(zval * object,BreakIterator * biter TSRMLS_DC)67 U_CFUNC void breakiterator_object_construct(zval *object,
68 BreakIterator *biter TSRMLS_DC)
69 {
70 BreakIterator_object *bio;
71
72 BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK; //populate to from object
73 assert(bio->biter == NULL);
74 bio->biter = biter;
75 }
76
77 /* {{{ compare handler for BreakIterator */
BreakIterator_compare_objects(zval * object1,zval * object2 TSRMLS_DC)78 static int BreakIterator_compare_objects(zval *object1,
79 zval *object2 TSRMLS_DC)
80 {
81 BreakIterator_object *bio1,
82 *bio2;
83
84 bio1 = (BreakIterator_object*)zend_object_store_get_object(object1 TSRMLS_CC);
85 bio2 = (BreakIterator_object*)zend_object_store_get_object(object2 TSRMLS_CC);
86
87 if (bio1->biter == NULL || bio2->biter == NULL) {
88 return bio1->biter == bio2->biter ? 0 : 1;
89 }
90
91 return *bio1->biter == *bio2->biter ? 0 : 1;
92 }
93 /* }}} */
94
95 /* {{{ clone handler for BreakIterator */
BreakIterator_clone_obj(zval * object TSRMLS_DC)96 static zend_object_value BreakIterator_clone_obj(zval *object TSRMLS_DC)
97 {
98 BreakIterator_object *bio_orig,
99 *bio_new;
100 zend_object_value ret_val;
101
102 bio_orig = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
103 intl_errors_reset(INTL_DATA_ERROR_P(bio_orig) TSRMLS_CC);
104
105 ret_val = BreakIterator_ce_ptr->create_object(Z_OBJCE_P(object) TSRMLS_CC);
106 bio_new = (BreakIterator_object*)zend_object_store_get_object_by_handle(
107 ret_val.handle TSRMLS_CC);
108
109 zend_objects_clone_members(&bio_new->zo, ret_val,
110 &bio_orig->zo, Z_OBJ_HANDLE_P(object) TSRMLS_CC);
111
112 if (bio_orig->biter != NULL) {
113 BreakIterator *new_biter;
114
115 new_biter = bio_orig->biter->clone();
116 if (!new_biter) {
117 char *err_msg;
118 intl_errors_set_code(BREAKITER_ERROR_P(bio_orig),
119 U_MEMORY_ALLOCATION_ERROR TSRMLS_CC);
120 intl_errors_set_custom_msg(BREAKITER_ERROR_P(bio_orig),
121 "Could not clone BreakIterator", 0 TSRMLS_CC);
122 err_msg = intl_error_get_message(BREAKITER_ERROR_P(bio_orig) TSRMLS_CC);
123 zend_throw_exception(NULL, err_msg, 0 TSRMLS_CC);
124 efree(err_msg);
125 } else {
126 bio_new->biter = new_biter;
127 bio_new->text = bio_orig->text;
128 if (bio_new->text) {
129 zval_add_ref(&bio_new->text);
130 }
131 }
132 } else {
133 zend_throw_exception(NULL, "Cannot clone unconstructed BreakIterator", 0 TSRMLS_CC);
134 }
135
136 return ret_val;
137 }
138 /* }}} */
139
140 /* {{{ get_debug_info handler for BreakIterator */
BreakIterator_get_debug_info(zval * object,int * is_temp TSRMLS_DC)141 static HashTable *BreakIterator_get_debug_info(zval *object, int *is_temp TSRMLS_DC)
142 {
143 zval zv = zval_used_for_init;
144 BreakIterator_object *bio;
145 const BreakIterator *biter;
146
147 *is_temp = 1;
148
149 array_init_size(&zv, 8);
150
151 bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
152 biter = bio->biter;
153
154 if (biter == NULL) {
155 add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 0);
156 return Z_ARRVAL(zv);
157 }
158 add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 1);
159
160 if (bio->text == NULL) {
161 add_assoc_null_ex(&zv, "text", sizeof("text"));
162 } else {
163 zval_add_ref(&bio->text);
164 add_assoc_zval_ex(&zv, "text", sizeof("text"), bio->text);
165 }
166
167 add_assoc_string_ex(&zv, "type", sizeof("type"),
168 const_cast<char*>(typeid(*biter).name()), 1);
169
170 return Z_ARRVAL(zv);
171 }
172 /* }}} */
173
174 /* {{{ void breakiterator_object_init(BreakIterator_object* to)
175 * Initialize internals of BreakIterator_object not specific to zend standard objects.
176 */
breakiterator_object_init(BreakIterator_object * bio TSRMLS_DC)177 static void breakiterator_object_init(BreakIterator_object *bio TSRMLS_DC)
178 {
179 intl_error_init(BREAKITER_ERROR_P(bio) TSRMLS_CC);
180 bio->biter = NULL;
181 bio->text = NULL;
182 }
183 /* }}} */
184
185 /* {{{ BreakIterator_objects_dtor */
BreakIterator_objects_dtor(void * object,zend_object_handle handle TSRMLS_DC)186 static void BreakIterator_objects_dtor(void *object,
187 zend_object_handle handle TSRMLS_DC)
188 {
189 zend_objects_destroy_object((zend_object*)object, handle TSRMLS_CC);
190 }
191 /* }}} */
192
193 /* {{{ BreakIterator_objects_free */
BreakIterator_objects_free(zend_object * object TSRMLS_DC)194 static void BreakIterator_objects_free(zend_object *object TSRMLS_DC)
195 {
196 BreakIterator_object* bio = (BreakIterator_object*) object;
197
198 if (bio->text) {
199 zval_ptr_dtor(&bio->text);
200 }
201 if (bio->biter) {
202 delete bio->biter;
203 bio->biter = NULL;
204 }
205 intl_error_reset(BREAKITER_ERROR_P(bio) TSRMLS_CC);
206
207 zend_object_std_dtor(&bio->zo TSRMLS_CC);
208
209 efree(bio);
210 }
211 /* }}} */
212
213 /* {{{ BreakIterator_object_create */
BreakIterator_object_create(zend_class_entry * ce TSRMLS_DC)214 static zend_object_value BreakIterator_object_create(zend_class_entry *ce TSRMLS_DC)
215 {
216 zend_object_value retval;
217 BreakIterator_object* intern;
218
219 intern = (BreakIterator_object*)ecalloc(1, sizeof(BreakIterator_object));
220
221 zend_object_std_init(&intern->zo, ce TSRMLS_CC);
222 #if PHP_VERSION_ID < 50399
223 zend_hash_copy(intern->zo.properties, &(ce->default_properties),
224 (copy_ctor_func_t) zval_add_ref, NULL, sizeof(zval*));
225 #else
226 object_properties_init((zend_object*) intern, ce);
227 #endif
228 breakiterator_object_init(intern TSRMLS_CC);
229
230 retval.handle = zend_objects_store_put(
231 intern,
232 BreakIterator_objects_dtor,
233 (zend_objects_free_object_storage_t) BreakIterator_objects_free,
234 NULL TSRMLS_CC);
235
236 retval.handlers = &BreakIterator_handlers;
237
238 return retval;
239 }
240 /* }}} */
241
242 /* {{{ BreakIterator/RuleBasedBreakIterator methods arguments info */
243
244 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_void, 0, 0, 0)
245 ZEND_END_ARG_INFO()
246
247 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_locale, 0, 0, 0)
248 ZEND_ARG_INFO(0, locale)
249 ZEND_END_ARG_INFO()
250
251 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_setText, 0, 0, 1)
252 ZEND_ARG_INFO(0, text)
253 ZEND_END_ARG_INFO()
254
255 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_next, 0, 0, 0)
256 ZEND_ARG_INFO(0, offset)
257 ZEND_END_ARG_INFO()
258
259 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_offset, 0, 0, 1)
260 ZEND_ARG_INFO(0, offset)
261 ZEND_END_ARG_INFO()
262
263 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_get_locale, 0, 0, 1)
264 ZEND_ARG_INFO(0, locale_type)
265 ZEND_END_ARG_INFO()
266
267 ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_getPartsIterator, 0, 0, 0)
268 ZEND_ARG_INFO(0, key_type)
269 ZEND_END_ARG_INFO()
270
271 ZEND_BEGIN_ARG_INFO_EX(ainfo_rbbi___construct, 0, 0, 1)
272 ZEND_ARG_INFO(0, rules)
273 ZEND_ARG_INFO(0, areCompiled)
274 ZEND_END_ARG_INFO()
275
276 /* }}} */
277
278 /* {{{ BreakIterator_class_functions
279 * Every 'BreakIterator' class method has an entry in this table
280 */
281 static const zend_function_entry BreakIterator_class_functions[] = {
282 PHP_ME(BreakIterator, __construct, ainfo_biter_void, ZEND_ACC_PRIVATE)
283 PHP_ME_MAPPING(createWordInstance, breakiter_create_word_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
284 PHP_ME_MAPPING(createLineInstance, breakiter_create_line_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
285 PHP_ME_MAPPING(createCharacterInstance, breakiter_create_character_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
286 PHP_ME_MAPPING(createSentenceInstance, breakiter_create_sentence_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
287 PHP_ME_MAPPING(createTitleInstance, breakiter_create_title_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
288 PHP_ME_MAPPING(createCodePointInstance, breakiter_create_code_point_instance, ainfo_biter_void, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
289 PHP_ME_MAPPING(getText, breakiter_get_text, ainfo_biter_void, ZEND_ACC_PUBLIC)
290 PHP_ME_MAPPING(setText, breakiter_set_text, ainfo_biter_setText, ZEND_ACC_PUBLIC)
291 PHP_ME_MAPPING(first, breakiter_first, ainfo_biter_void, ZEND_ACC_PUBLIC)
292 PHP_ME_MAPPING(last, breakiter_last, ainfo_biter_void, ZEND_ACC_PUBLIC)
293 PHP_ME_MAPPING(previous, breakiter_previous, ainfo_biter_void, ZEND_ACC_PUBLIC)
294 PHP_ME_MAPPING(next, breakiter_next, ainfo_biter_next, ZEND_ACC_PUBLIC)
295 PHP_ME_MAPPING(current, breakiter_current, ainfo_biter_void, ZEND_ACC_PUBLIC)
296 PHP_ME_MAPPING(following, breakiter_following, ainfo_biter_offset, ZEND_ACC_PUBLIC)
297 PHP_ME_MAPPING(preceding, breakiter_preceding, ainfo_biter_offset, ZEND_ACC_PUBLIC)
298 PHP_ME_MAPPING(isBoundary, breakiter_is_boundary, ainfo_biter_offset, ZEND_ACC_PUBLIC)
299 PHP_ME_MAPPING(getLocale, breakiter_get_locale, ainfo_biter_get_locale, ZEND_ACC_PUBLIC)
300 PHP_ME_MAPPING(getPartsIterator, breakiter_get_parts_iterator, ainfo_biter_getPartsIterator, ZEND_ACC_PUBLIC)
301
302 PHP_ME_MAPPING(getErrorCode, breakiter_get_error_code, ainfo_biter_void, ZEND_ACC_PUBLIC)
303 PHP_ME_MAPPING(getErrorMessage, breakiter_get_error_message, ainfo_biter_void, ZEND_ACC_PUBLIC)
304 PHP_FE_END
305 };
306 /* }}} */
307
308 /* {{{ RuleBasedBreakIterator_class_functions
309 */
310 static const zend_function_entry RuleBasedBreakIterator_class_functions[] = {
311 PHP_ME(IntlRuleBasedBreakIterator, __construct, ainfo_rbbi___construct, ZEND_ACC_PUBLIC)
312 PHP_ME_MAPPING(getRules, rbbi_get_rules, ainfo_biter_void, ZEND_ACC_PUBLIC)
313 PHP_ME_MAPPING(getRuleStatus, rbbi_get_rule_status, ainfo_biter_void, ZEND_ACC_PUBLIC)
314 PHP_ME_MAPPING(getRuleStatusVec, rbbi_get_rule_status_vec, ainfo_biter_void, ZEND_ACC_PUBLIC)
315 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
316 PHP_ME_MAPPING(getBinaryRules, rbbi_get_binary_rules, ainfo_biter_void, ZEND_ACC_PUBLIC)
317 #endif
318 PHP_FE_END
319 };
320 /* }}} */
321
322 /* {{{ CodePointBreakIterator_class_functions
323 */
324 static const zend_function_entry CodePointBreakIterator_class_functions[] = {
325 PHP_ME_MAPPING(getLastCodePoint, cpbi_get_last_code_point, ainfo_biter_void, ZEND_ACC_PUBLIC)
326 PHP_FE_END
327 };
328 /* }}} */
329
330
331 /* {{{ breakiterator_register_BreakIterator_class
332 * Initialize 'BreakIterator' class
333 */
breakiterator_register_BreakIterator_class(TSRMLS_D)334 U_CFUNC void breakiterator_register_BreakIterator_class(TSRMLS_D)
335 {
336 zend_class_entry ce;
337
338 /* Create and register 'BreakIterator' class. */
339 INIT_CLASS_ENTRY(ce, "IntlBreakIterator", BreakIterator_class_functions);
340 ce.create_object = BreakIterator_object_create;
341 ce.get_iterator = _breakiterator_get_iterator;
342 BreakIterator_ce_ptr = zend_register_internal_class(&ce TSRMLS_CC);
343
344 memcpy(&BreakIterator_handlers, zend_get_std_object_handlers(),
345 sizeof BreakIterator_handlers);
346 BreakIterator_handlers.compare_objects = BreakIterator_compare_objects;
347 BreakIterator_handlers.clone_obj = BreakIterator_clone_obj;
348 BreakIterator_handlers.get_debug_info = BreakIterator_get_debug_info;
349
350 zend_class_implements(BreakIterator_ce_ptr TSRMLS_CC, 1,
351 zend_ce_traversable);
352
353 zend_declare_class_constant_long(BreakIterator_ce_ptr,
354 "DONE", sizeof("DONE") - 1, BreakIterator::DONE TSRMLS_CC );
355
356 /* Declare constants that are defined in the C header */
357 #define BREAKITER_DECL_LONG_CONST(name) \
358 zend_declare_class_constant_long(BreakIterator_ce_ptr, #name, \
359 sizeof(#name) - 1, UBRK_ ## name TSRMLS_CC)
360
361 BREAKITER_DECL_LONG_CONST(WORD_NONE);
362 BREAKITER_DECL_LONG_CONST(WORD_NONE_LIMIT);
363 BREAKITER_DECL_LONG_CONST(WORD_NUMBER);
364 BREAKITER_DECL_LONG_CONST(WORD_NUMBER_LIMIT);
365 BREAKITER_DECL_LONG_CONST(WORD_LETTER);
366 BREAKITER_DECL_LONG_CONST(WORD_LETTER_LIMIT);
367 BREAKITER_DECL_LONG_CONST(WORD_KANA);
368 BREAKITER_DECL_LONG_CONST(WORD_KANA_LIMIT);
369 BREAKITER_DECL_LONG_CONST(WORD_IDEO);
370 BREAKITER_DECL_LONG_CONST(WORD_IDEO_LIMIT);
371
372 BREAKITER_DECL_LONG_CONST(LINE_SOFT);
373 BREAKITER_DECL_LONG_CONST(LINE_SOFT_LIMIT);
374 BREAKITER_DECL_LONG_CONST(LINE_HARD);
375 BREAKITER_DECL_LONG_CONST(LINE_HARD_LIMIT);
376
377 BREAKITER_DECL_LONG_CONST(SENTENCE_TERM);
378 BREAKITER_DECL_LONG_CONST(SENTENCE_TERM_LIMIT);
379 BREAKITER_DECL_LONG_CONST(SENTENCE_SEP);
380 BREAKITER_DECL_LONG_CONST(SENTENCE_SEP_LIMIT);
381
382 #undef BREAKITER_DECL_LONG_CONST
383
384
385 /* Create and register 'RuleBasedBreakIterator' class. */
386 INIT_CLASS_ENTRY(ce, "IntlRuleBasedBreakIterator",
387 RuleBasedBreakIterator_class_functions);
388 RuleBasedBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
389 BreakIterator_ce_ptr, NULL TSRMLS_CC);
390
391 /* Create and register 'CodePointBreakIterator' class. */
392 INIT_CLASS_ENTRY(ce, "IntlCodePointBreakIterator",
393 CodePointBreakIterator_class_functions);
394 CodePointBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
395 BreakIterator_ce_ptr, NULL TSRMLS_CC);
396 }
397 /* }}} */
398