1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13  */
14 
15 #include <unicode/rbbi.h>
16 #include <memory>
17 
18 extern "C" {
19 #define USE_BREAKITERATOR_POINTER 1
20 #include "breakiterator_class.h"
21 #include <zend_exceptions.h>
22 #include <limits.h>
23 }
24 
25 #include "../intl_convertcpp.h"
26 #include "../intl_common.h"
27 
28 using icu::RuleBasedBreakIterator;
29 using icu::Locale;
30 
fetch_rbbi(BreakIterator_object * bio)31 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
32 	return (RuleBasedBreakIterator*)bio->biter;
33 }
34 
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS,zend_error_handling * error_handling,bool * error_handling_replaced)35 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_handling, bool *error_handling_replaced)
36 {
37 	char		*rules;
38 	size_t		rules_len;
39 	bool	compiled	= 0;
40 	UErrorCode	status		= U_ZERO_ERROR;
41 	BREAKITER_METHOD_INIT_VARS;
42 	object = ZEND_THIS;
43 
44 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
45 			&rules, &rules_len, &compiled) == FAILURE) {
46 		RETURN_THROWS();
47 	}
48 
49 	BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK;
50 	if (bio->biter) {
51 		zend_throw_error(NULL, "IntlRuleBasedBreakIterator object is already constructed");
52 		RETURN_THROWS();
53 	}
54 
55 	zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, error_handling);
56 	*error_handling_replaced = 1;
57 
58 	// instantiation of ICU object
59 	RuleBasedBreakIterator *rbbi;
60 
61 	if (!compiled) {
62 		UnicodeString	rulesStr;
63 		UParseError		parseError = UParseError();
64 		if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
65 				== FAILURE) {
66 			zend_throw_exception(IntlException_ce_ptr,
67 				"IntlRuleBasedBreakIterator::__construct(): "
68 				"rules were not a valid UTF-8 string", 0);
69 			RETURN_THROWS();
70 		}
71 
72 		rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
73 		intl_error_set_code(NULL, status);
74 		if (U_FAILURE(status)) {
75 			smart_str parse_error_str;
76 			parse_error_str = intl_parse_error_to_string(&parseError);
77 			zend_throw_exception_ex(IntlException_ce_ptr, 0,
78 				"IntlRuleBasedBreakIterator::__construct(): "
79 				"unable to create RuleBasedBreakIterator from rules (%s)",
80 				parse_error_str.s ? ZSTR_VAL(parse_error_str.s) : "");
81 			smart_str_free(&parse_error_str);
82 			delete rbbi;
83 			RETURN_THROWS();
84 		}
85 	} else { // compiled
86 		rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
87 		if (U_FAILURE(status)) {
88 			zend_throw_exception(IntlException_ce_ptr,
89 				"IntlRuleBasedBreakIterator::__construct(): "
90 				"unable to create instance from compiled rules", 0);
91 			delete rbbi;
92 			RETURN_THROWS();
93 		}
94 	}
95 
96 	breakiterator_object_create(return_value, rbbi, 0);
97 }
98 
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)99 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
100 {
101 	zend_error_handling error_handling;
102 	bool error_handling_replaced = 0;
103 
104 	return_value = ZEND_THIS;
105 	_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU, &error_handling, &error_handling_replaced);
106 	if (error_handling_replaced) {
107 		zend_restore_error_handling(&error_handling);
108 	}
109 }
110 
PHP_METHOD(IntlRuleBasedBreakIterator,getRules)111 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRules)
112 {
113 	BREAKITER_METHOD_INIT_VARS;
114 	object = ZEND_THIS;
115 
116 	if (zend_parse_parameters_none() == FAILURE) {
117 		RETURN_THROWS();
118 	}
119 
120 	BREAKITER_METHOD_FETCH_OBJECT;
121 
122 	zend_string *u8str;
123 	const UnicodeString rules = fetch_rbbi(bio)->getRules();
124 
125 	u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
126 	if (!u8str)
127 	{
128 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
129 				"rbbi_hash_code: Error converting result to UTF-8 string",
130 				0);
131 		RETURN_FALSE;
132 	}
133 	RETVAL_STR(u8str);
134 }
135 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatus)136 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatus)
137 {
138 	BREAKITER_METHOD_INIT_VARS;
139 	object = ZEND_THIS;
140 
141 	if (zend_parse_parameters_none() == FAILURE) {
142 		RETURN_THROWS();
143 	}
144 
145 	BREAKITER_METHOD_FETCH_OBJECT;
146 
147 	RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
148 }
149 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatusVec)150 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatusVec)
151 {
152 	BREAKITER_METHOD_INIT_VARS;
153 	object = ZEND_THIS;
154 
155 	if (zend_parse_parameters_none() == FAILURE) {
156 		RETURN_THROWS();
157 	}
158 
159 	BREAKITER_METHOD_FETCH_OBJECT;
160 
161 	int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
162 			BREAKITER_ERROR_CODE(bio));
163 
164 	ZEND_ASSERT(BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR);
165 	BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
166 
167 	std::unique_ptr<int32_t[]> rules = std::unique_ptr<int32_t[]>(new int32_t[num_rules]);
168 	num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules.get(), num_rules,
169 			BREAKITER_ERROR_CODE(bio));
170 	if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
171 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
172 				"rbbi_get_rule_status_vec: failed obtaining the status values",
173 				0);
174 		RETURN_FALSE;
175 	}
176 
177 	array_init_size(return_value, num_rules);
178 	for (int32_t i = 0; i < num_rules; i++) {
179 		add_next_index_long(return_value, rules[i]);
180 	}
181 }
182 
PHP_METHOD(IntlRuleBasedBreakIterator,getBinaryRules)183 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getBinaryRules)
184 {
185 	BREAKITER_METHOD_INIT_VARS;
186 	object = ZEND_THIS;
187 
188 	if (zend_parse_parameters_none() == FAILURE) {
189 		RETURN_THROWS();
190 	}
191 
192 	BREAKITER_METHOD_FETCH_OBJECT;
193 
194 	uint32_t		rules_len;
195 	const uint8_t	*rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
196 
197 	if (rules_len > INT_MAX - 1) {
198 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
199 				"rbbi_get_binary_rules: the rules are too large",
200 				0);
201 		RETURN_FALSE;
202 	}
203 
204 	zend_string *ret_rules = zend_string_alloc(rules_len, 0);
205 	memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
206 	ZSTR_VAL(ret_rules)[rules_len] = '\0';
207 
208 	RETURN_STR(ret_rules);
209 }
210