1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Gustavo Lopes <cataphract@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include <unicode/rbbi.h>
18
19 extern "C" {
20 #define USE_BREAKITERATOR_POINTER 1
21 #include "breakiterator_class.h"
22 #include <zend_exceptions.h>
23 #include <limits.h>
24 }
25
26 #include "../intl_convertcpp.h"
27 #include "../intl_common.h"
28
fetch_rbbi(BreakIterator_object * bio)29 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
30 return (RuleBasedBreakIterator*)bio->biter;
31 }
32
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)33 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
34 {
35 char *rules;
36 size_t rules_len;
37 zend_bool compiled = 0;
38 UErrorCode status = U_ZERO_ERROR;
39 intl_error_reset(NULL);
40
41 if (zend_parse_parameters_throw(ZEND_NUM_ARGS(), "s|b",
42 &rules, &rules_len, &compiled) == FAILURE) {
43 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
44 "rbbi_create_instance: bad arguments", 0);
45 return;
46 }
47
48 // instantiation of ICU object
49 RuleBasedBreakIterator *rbbi;
50
51 if (!compiled) {
52 UnicodeString rulesStr;
53 UParseError parseError = UParseError();
54 if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
55 == FAILURE) {
56 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
57 "rbbi_create_instance: rules were not a valid UTF-8 string",
58 0);
59 RETURN_NULL();
60 }
61
62 rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
63 intl_error_set_code(NULL, status);
64 if (U_FAILURE(status)) {
65 char *msg;
66 smart_str parse_error_str;
67 parse_error_str = intl_parse_error_to_string(&parseError);
68 spprintf(&msg, 0, "rbbi_create_instance: unable to create "
69 "RuleBasedBreakIterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "");
70 smart_str_free(&parse_error_str);
71 intl_error_set_custom_msg(NULL, msg, 1);
72 efree(msg);
73 delete rbbi;
74 return;
75 }
76 } else { // compiled
77 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
78 rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
79 if (U_FAILURE(status)) {
80 intl_error_set(NULL, status, "rbbi_create_instance: unable to "
81 "create instance from compiled rules", 0);
82 return;
83 }
84 #else
85 intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: "
86 "compiled rules require ICU >= 4.8", 0);
87 return;
88 #endif
89 }
90
91 breakiterator_object_create(return_value, rbbi, 0);
92 }
93
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)94 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
95 {
96 zend_error_handling error_handling;
97
98 zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, &error_handling);
99 return_value = getThis();
100 _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
101 zend_restore_error_handling(&error_handling);
102 }
103
PHP_FUNCTION(rbbi_get_rules)104 U_CFUNC PHP_FUNCTION(rbbi_get_rules)
105 {
106 BREAKITER_METHOD_INIT_VARS;
107 object = getThis();
108
109 if (zend_parse_parameters_none() == FAILURE) {
110 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
111 "rbbi_get_rules: bad arguments", 0);
112 RETURN_FALSE;
113 }
114
115 BREAKITER_METHOD_FETCH_OBJECT;
116
117 zend_string *u8str;
118 const UnicodeString rules = fetch_rbbi(bio)->getRules();
119
120 u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
121 if (!u8str)
122 {
123 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
124 "rbbi_hash_code: Error converting result to UTF-8 string",
125 0);
126 RETURN_FALSE;
127 }
128 RETVAL_STR(u8str);
129 }
130
PHP_FUNCTION(rbbi_get_rule_status)131 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status)
132 {
133 BREAKITER_METHOD_INIT_VARS;
134 object = getThis();
135
136 if (zend_parse_parameters_none() == FAILURE) {
137 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
138 "rbbi_get_rule_status: bad arguments", 0);
139 RETURN_FALSE;
140 }
141
142 BREAKITER_METHOD_FETCH_OBJECT;
143
144 RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
145 }
146
PHP_FUNCTION(rbbi_get_rule_status_vec)147 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec)
148 {
149 BREAKITER_METHOD_INIT_VARS;
150 object = getThis();
151
152 if (zend_parse_parameters_none() == FAILURE) {
153 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
154 "rbbi_get_rule_status_vec: bad arguments", 0);
155 RETURN_FALSE;
156 }
157
158 BREAKITER_METHOD_FETCH_OBJECT;
159
160 int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
161 BREAKITER_ERROR_CODE(bio));
162 if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) {
163 BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
164 } else {
165 // should not happen
166 INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed "
167 " determining the number of status values");
168 }
169 int32_t *rules = new int32_t[num_rules];
170 num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
171 BREAKITER_ERROR_CODE(bio));
172 if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
173 delete[] rules;
174 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
175 "rbbi_get_rule_status_vec: failed obtaining the status values",
176 0);
177 RETURN_FALSE;
178 }
179
180 array_init_size(return_value, num_rules);
181 for (int32_t i = 0; i < num_rules; i++) {
182 add_next_index_long(return_value, rules[i]);
183 }
184 delete[] rules;
185 }
186
187 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
PHP_FUNCTION(rbbi_get_binary_rules)188 U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules)
189 {
190 BREAKITER_METHOD_INIT_VARS;
191 object = getThis();
192
193 if (zend_parse_parameters_none() == FAILURE) {
194 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
195 "rbbi_get_binary_rules: bad arguments", 0);
196 RETURN_FALSE;
197 }
198
199 BREAKITER_METHOD_FETCH_OBJECT;
200
201 uint32_t rules_len;
202 const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
203
204 if (rules_len > INT_MAX - 1) {
205 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
206 "rbbi_get_binary_rules: the rules are too large",
207 0);
208 RETURN_FALSE;
209 }
210
211 zend_string *ret_rules = zend_string_alloc(rules_len, 0);
212 memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
213 ZSTR_VAL(ret_rules)[rules_len] = '\0';
214
215 RETURN_STR(ret_rules);
216 }
217 #endif
218