1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Gustavo Lopes <cataphract@php.net> |
12 +----------------------------------------------------------------------+
13 */
14
15 #include <unicode/rbbi.h>
16
17 extern "C" {
18 #define USE_BREAKITERATOR_POINTER 1
19 #include "breakiterator_class.h"
20 #include <zend_exceptions.h>
21 #include <limits.h>
22 }
23
24 #include "../intl_convertcpp.h"
25 #include "../intl_common.h"
26
27 using icu::RuleBasedBreakIterator;
28 using icu::Locale;
29
fetch_rbbi(BreakIterator_object * bio)30 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
31 return (RuleBasedBreakIterator*)bio->biter;
32 }
33
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS,zend_error_handling * error_handling,bool * error_handling_replaced)34 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_handling, bool *error_handling_replaced)
35 {
36 char *rules;
37 size_t rules_len;
38 bool compiled = 0;
39 UErrorCode status = U_ZERO_ERROR;
40 BREAKITER_METHOD_INIT_VARS;
41 object = ZEND_THIS;
42
43 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
44 &rules, &rules_len, &compiled) == FAILURE) {
45 RETURN_THROWS();
46 }
47
48 BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK;
49 if (bio->biter) {
50 zend_throw_error(NULL, "IntlRuleBasedBreakIterator object is already constructed");
51 RETURN_THROWS();
52 }
53
54 zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, error_handling);
55 *error_handling_replaced = 1;
56
57 // instantiation of ICU object
58 RuleBasedBreakIterator *rbbi;
59
60 if (!compiled) {
61 UnicodeString rulesStr;
62 UParseError parseError = UParseError();
63 if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
64 == FAILURE) {
65 zend_throw_exception(IntlException_ce_ptr,
66 "IntlRuleBasedBreakIterator::__construct(): "
67 "rules were not a valid UTF-8 string", 0);
68 RETURN_THROWS();
69 }
70
71 rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
72 intl_error_set_code(NULL, status);
73 if (U_FAILURE(status)) {
74 smart_str parse_error_str;
75 parse_error_str = intl_parse_error_to_string(&parseError);
76 zend_throw_exception_ex(IntlException_ce_ptr, 0,
77 "IntlRuleBasedBreakIterator::__construct(): "
78 "unable to create RuleBasedBreakIterator from rules (%s)",
79 parse_error_str.s ? ZSTR_VAL(parse_error_str.s) : "");
80 smart_str_free(&parse_error_str);
81 delete rbbi;
82 RETURN_THROWS();
83 }
84 } else { // compiled
85 rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
86 if (U_FAILURE(status)) {
87 zend_throw_exception(IntlException_ce_ptr,
88 "IntlRuleBasedBreakIterator::__construct(): "
89 "unable to create instance from compiled rules", 0);
90 delete rbbi;
91 RETURN_THROWS();
92 }
93 }
94
95 breakiterator_object_create(return_value, rbbi, 0);
96 }
97
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)98 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
99 {
100 zend_error_handling error_handling;
101 bool error_handling_replaced = 0;
102
103 return_value = ZEND_THIS;
104 _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU, &error_handling, &error_handling_replaced);
105 if (error_handling_replaced) {
106 zend_restore_error_handling(&error_handling);
107 }
108 }
109
PHP_METHOD(IntlRuleBasedBreakIterator,getRules)110 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRules)
111 {
112 BREAKITER_METHOD_INIT_VARS;
113 object = ZEND_THIS;
114
115 if (zend_parse_parameters_none() == FAILURE) {
116 RETURN_THROWS();
117 }
118
119 BREAKITER_METHOD_FETCH_OBJECT;
120
121 zend_string *u8str;
122 const UnicodeString rules = fetch_rbbi(bio)->getRules();
123
124 u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
125 if (!u8str)
126 {
127 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
128 "rbbi_hash_code: Error converting result to UTF-8 string",
129 0);
130 RETURN_FALSE;
131 }
132 RETVAL_STR(u8str);
133 }
134
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatus)135 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatus)
136 {
137 BREAKITER_METHOD_INIT_VARS;
138 object = ZEND_THIS;
139
140 if (zend_parse_parameters_none() == FAILURE) {
141 RETURN_THROWS();
142 }
143
144 BREAKITER_METHOD_FETCH_OBJECT;
145
146 RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
147 }
148
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatusVec)149 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatusVec)
150 {
151 BREAKITER_METHOD_INIT_VARS;
152 object = ZEND_THIS;
153
154 if (zend_parse_parameters_none() == FAILURE) {
155 RETURN_THROWS();
156 }
157
158 BREAKITER_METHOD_FETCH_OBJECT;
159
160 int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
161 BREAKITER_ERROR_CODE(bio));
162
163 ZEND_ASSERT(BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR);
164 BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
165
166 int32_t *rules = new int32_t[num_rules];
167 num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
168 BREAKITER_ERROR_CODE(bio));
169 if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
170 delete[] rules;
171 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
172 "rbbi_get_rule_status_vec: failed obtaining the status values",
173 0);
174 RETURN_FALSE;
175 }
176
177 array_init_size(return_value, num_rules);
178 for (int32_t i = 0; i < num_rules; i++) {
179 add_next_index_long(return_value, rules[i]);
180 }
181 delete[] rules;
182 }
183
PHP_METHOD(IntlRuleBasedBreakIterator,getBinaryRules)184 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getBinaryRules)
185 {
186 BREAKITER_METHOD_INIT_VARS;
187 object = ZEND_THIS;
188
189 if (zend_parse_parameters_none() == FAILURE) {
190 RETURN_THROWS();
191 }
192
193 BREAKITER_METHOD_FETCH_OBJECT;
194
195 uint32_t rules_len;
196 const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
197
198 if (rules_len > INT_MAX - 1) {
199 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
200 "rbbi_get_binary_rules: the rules are too large",
201 0);
202 RETURN_FALSE;
203 }
204
205 zend_string *ret_rules = zend_string_alloc(rules_len, 0);
206 memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
207 ZSTR_VAL(ret_rules)[rules_len] = '\0';
208
209 RETURN_STR(ret_rules);
210 }
211