1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Gustavo Lopes <cataphract@php.net> |
12 +----------------------------------------------------------------------+
13 */
14
15 #include <unicode/rbbi.h>
16 #include <memory>
17
18 extern "C" {
19 #define USE_BREAKITERATOR_POINTER 1
20 #include "breakiterator_class.h"
21 #include <zend_exceptions.h>
22 #include <limits.h>
23 }
24
25 #include "../intl_convertcpp.h"
26 #include "../intl_common.h"
27
28 using icu::RuleBasedBreakIterator;
29 using icu::Locale;
30
fetch_rbbi(BreakIterator_object * bio)31 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
32 return (RuleBasedBreakIterator*)bio->biter;
33 }
34
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS,zend_error_handling * error_handling,bool * error_handling_replaced)35 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_handling, bool *error_handling_replaced)
36 {
37 char *rules;
38 size_t rules_len;
39 bool compiled = 0;
40 UErrorCode status = U_ZERO_ERROR;
41 BREAKITER_METHOD_INIT_VARS;
42 object = ZEND_THIS;
43
44 if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
45 &rules, &rules_len, &compiled) == FAILURE) {
46 RETURN_THROWS();
47 }
48
49 BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK;
50 if (bio->biter) {
51 zend_throw_error(NULL, "IntlRuleBasedBreakIterator object is already constructed");
52 RETURN_THROWS();
53 }
54
55 zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, error_handling);
56 *error_handling_replaced = 1;
57
58 // instantiation of ICU object
59 RuleBasedBreakIterator *rbbi;
60
61 if (!compiled) {
62 UnicodeString rulesStr;
63 UParseError parseError = UParseError();
64 if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
65 == FAILURE) {
66 zend_throw_exception(IntlException_ce_ptr,
67 "IntlRuleBasedBreakIterator::__construct(): "
68 "rules were not a valid UTF-8 string", 0);
69 RETURN_THROWS();
70 }
71
72 rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
73 intl_error_set_code(NULL, status);
74 if (U_FAILURE(status)) {
75 smart_str parse_error_str;
76 parse_error_str = intl_parse_error_to_string(&parseError);
77 zend_throw_exception_ex(IntlException_ce_ptr, 0,
78 "IntlRuleBasedBreakIterator::__construct(): "
79 "unable to create RuleBasedBreakIterator from rules (%s)",
80 parse_error_str.s ? ZSTR_VAL(parse_error_str.s) : "");
81 smart_str_free(&parse_error_str);
82 delete rbbi;
83 RETURN_THROWS();
84 }
85 } else { // compiled
86 rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
87 if (U_FAILURE(status)) {
88 zend_throw_exception(IntlException_ce_ptr,
89 "IntlRuleBasedBreakIterator::__construct(): "
90 "unable to create instance from compiled rules", 0);
91 delete rbbi;
92 RETURN_THROWS();
93 }
94 }
95
96 breakiterator_object_create(return_value, rbbi, 0);
97 }
98
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)99 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
100 {
101 zend_error_handling error_handling;
102 bool error_handling_replaced = 0;
103
104 return_value = ZEND_THIS;
105 _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU, &error_handling, &error_handling_replaced);
106 if (error_handling_replaced) {
107 zend_restore_error_handling(&error_handling);
108 }
109 }
110
PHP_METHOD(IntlRuleBasedBreakIterator,getRules)111 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRules)
112 {
113 BREAKITER_METHOD_INIT_VARS;
114 object = ZEND_THIS;
115
116 if (zend_parse_parameters_none() == FAILURE) {
117 RETURN_THROWS();
118 }
119
120 BREAKITER_METHOD_FETCH_OBJECT;
121
122 zend_string *u8str;
123 const UnicodeString rules = fetch_rbbi(bio)->getRules();
124
125 u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
126 if (!u8str)
127 {
128 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
129 "rbbi_hash_code: Error converting result to UTF-8 string",
130 0);
131 RETURN_FALSE;
132 }
133 RETVAL_STR(u8str);
134 }
135
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatus)136 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatus)
137 {
138 BREAKITER_METHOD_INIT_VARS;
139 object = ZEND_THIS;
140
141 if (zend_parse_parameters_none() == FAILURE) {
142 RETURN_THROWS();
143 }
144
145 BREAKITER_METHOD_FETCH_OBJECT;
146
147 RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
148 }
149
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatusVec)150 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatusVec)
151 {
152 BREAKITER_METHOD_INIT_VARS;
153 object = ZEND_THIS;
154
155 if (zend_parse_parameters_none() == FAILURE) {
156 RETURN_THROWS();
157 }
158
159 BREAKITER_METHOD_FETCH_OBJECT;
160
161 int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
162 BREAKITER_ERROR_CODE(bio));
163
164 ZEND_ASSERT(BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR);
165 BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
166
167 std::unique_ptr<int32_t[]> rules = std::unique_ptr<int32_t[]>(new int32_t[num_rules]);
168 num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules.get(), num_rules,
169 BREAKITER_ERROR_CODE(bio));
170 if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
171 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
172 "rbbi_get_rule_status_vec: failed obtaining the status values",
173 0);
174 RETURN_FALSE;
175 }
176
177 array_init_size(return_value, num_rules);
178 for (int32_t i = 0; i < num_rules; i++) {
179 add_next_index_long(return_value, rules[i]);
180 }
181 }
182
PHP_METHOD(IntlRuleBasedBreakIterator,getBinaryRules)183 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getBinaryRules)
184 {
185 BREAKITER_METHOD_INIT_VARS;
186 object = ZEND_THIS;
187
188 if (zend_parse_parameters_none() == FAILURE) {
189 RETURN_THROWS();
190 }
191
192 BREAKITER_METHOD_FETCH_OBJECT;
193
194 uint32_t rules_len;
195 const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
196
197 if (rules_len > INT_MAX - 1) {
198 intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
199 "rbbi_get_binary_rules: the rules are too large",
200 0);
201 RETURN_FALSE;
202 }
203
204 zend_string *ret_rules = zend_string_alloc(rules_len, 0);
205 memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
206 ZSTR_VAL(ret_rules)[rules_len] = '\0';
207
208 RETURN_STR(ret_rules);
209 }
210