1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Gustavo Lopes <cataphract@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include <unicode/rbbi.h>
18
19 extern "C" {
20 #define USE_BREAKITERATOR_POINTER 1
21 #include "breakiterator_class.h"
22 #include <zend_exceptions.h>
23 #include <limits.h>
24 }
25
26 #include "../intl_convertcpp.h"
27
fetch_rbbi(BreakIterator_object * bio)28 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
29 return (RuleBasedBreakIterator*)bio->biter;
30 }
31
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)32 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
33 {
34 zval *object = getThis();
35 char *rules;
36 int rules_len;
37 zend_bool compiled = 0;
38 UErrorCode status = U_ZERO_ERROR;
39 intl_error_reset(NULL TSRMLS_CC);
40
41 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b",
42 &rules, &rules_len, &compiled) == FAILURE) {
43 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
44 "rbbi_create_instance: bad arguments", 0 TSRMLS_CC);
45 RETURN_NULL();
46 }
47
48 // instantiation of ICU object
49 RuleBasedBreakIterator *rbbi;
50
51 if (!compiled) {
52 UnicodeString rulesStr;
53 UParseError parseError = UParseError();
54 if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
55 == FAILURE) {
56 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
57 "rbbi_create_instance: rules were not a valid UTF-8 string",
58 0 TSRMLS_CC);
59 RETURN_NULL();
60 }
61
62 rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
63 intl_error_set_code(NULL, status TSRMLS_CC);
64 if (U_FAILURE(status)) {
65 char *msg;
66 smart_str parse_error_str;
67 parse_error_str = intl_parse_error_to_string(&parseError);
68 spprintf(&msg, 0, "rbbi_create_instance: unable to create "
69 "RuleBasedBreakIterator from rules (%s)", parse_error_str.c);
70 smart_str_free(&parse_error_str);
71 intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC);
72 efree(msg);
73 delete rbbi;
74 RETURN_NULL();
75 }
76 } else { // compiled
77 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
78 rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
79 if (U_FAILURE(status)) {
80 intl_error_set(NULL, status, "rbbi_create_instance: unable to "
81 "create instance from compiled rules", 0 TSRMLS_CC);
82 delete rbbi;
83 RETURN_NULL();
84 }
85 #else
86 intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: "
87 "compiled rules require ICU >= 4.8", 0 TSRMLS_CC);
88 RETURN_NULL();
89 #endif
90 }
91
92 breakiterator_object_create(return_value, rbbi TSRMLS_CC);
93 }
94
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)95 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
96 {
97 zval orig_this = *getThis();
98
99 return_value = getThis();
100 //changes this to IS_NULL (without first destroying) if there's an error
101 _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
102
103 if (Z_TYPE_P(return_value) == IS_NULL) {
104 zend_object_store_ctor_failed(&orig_this TSRMLS_CC);
105 zval_dtor(&orig_this);
106 }
107 }
108
PHP_FUNCTION(rbbi_get_rules)109 U_CFUNC PHP_FUNCTION(rbbi_get_rules)
110 {
111 BREAKITER_METHOD_INIT_VARS;
112 object = getThis();
113
114 if (zend_parse_parameters_none() == FAILURE) {
115 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
116 "rbbi_get_rules: bad arguments", 0 TSRMLS_CC);
117 RETURN_FALSE;
118 }
119
120 BREAKITER_METHOD_FETCH_OBJECT;
121
122 const UnicodeString rules = fetch_rbbi(bio)->getRules();
123
124 Z_TYPE_P(return_value) = IS_STRING;
125 if (intl_charFromString(rules, &Z_STRVAL_P(return_value),
126 &Z_STRLEN_P(return_value), BREAKITER_ERROR_CODE_P(bio)) == FAILURE)
127 {
128 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
129 "rbbi_hash_code: Error converting result to UTF-8 string",
130 0 TSRMLS_CC);
131 RETURN_FALSE;
132 }
133 }
134
PHP_FUNCTION(rbbi_get_rule_status)135 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status)
136 {
137 BREAKITER_METHOD_INIT_VARS;
138 object = getThis();
139
140 if (zend_parse_parameters_none() == FAILURE) {
141 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
142 "rbbi_get_rule_status: bad arguments", 0 TSRMLS_CC);
143 RETURN_FALSE;
144 }
145
146 BREAKITER_METHOD_FETCH_OBJECT;
147
148 RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
149 }
150
PHP_FUNCTION(rbbi_get_rule_status_vec)151 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec)
152 {
153 BREAKITER_METHOD_INIT_VARS;
154 object = getThis();
155
156 if (zend_parse_parameters_none() == FAILURE) {
157 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
158 "rbbi_get_rule_status_vec: bad arguments", 0 TSRMLS_CC);
159 RETURN_FALSE;
160 }
161
162 BREAKITER_METHOD_FETCH_OBJECT;
163
164 int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
165 BREAKITER_ERROR_CODE(bio));
166 if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) {
167 BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
168 } else {
169 // should not happen
170 INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed "
171 " determining the number of status values");
172 }
173 int32_t *rules = new int32_t[num_rules];
174 num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
175 BREAKITER_ERROR_CODE(bio));
176 if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
177 delete[] rules;
178 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
179 "rbbi_get_rule_status_vec: failed obtaining the status values",
180 0 TSRMLS_CC);
181 RETURN_FALSE;
182 }
183
184 array_init_size(return_value, num_rules);
185 for (int32_t i = 0; i < num_rules; i++) {
186 add_next_index_long(return_value, rules[i]);
187 }
188 delete[] rules;
189 }
190
191 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
PHP_FUNCTION(rbbi_get_binary_rules)192 U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules)
193 {
194 BREAKITER_METHOD_INIT_VARS;
195 object = getThis();
196
197 if (zend_parse_parameters_none() == FAILURE) {
198 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
199 "rbbi_get_binary_rules: bad arguments", 0 TSRMLS_CC);
200 RETURN_FALSE;
201 }
202
203 BREAKITER_METHOD_FETCH_OBJECT;
204
205 uint32_t rules_len;
206 const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
207
208 if (rules_len > INT_MAX - 1) {
209 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
210 "rbbi_get_binary_rules: the rules are too large",
211 0 TSRMLS_CC);
212 RETURN_FALSE;
213 }
214
215 char *ret_rules = static_cast<char*>(emalloc(rules_len + 1));
216 memcpy(ret_rules, rules, rules_len);
217 ret_rules[rules_len] = '\0';
218
219 RETURN_STRINGL(ret_rules, rules_len, 0);
220 }
221 #endif
222