1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13  */
14 
15 #include <unicode/rbbi.h>
16 
17 extern "C" {
18 #define USE_BREAKITERATOR_POINTER 1
19 #include "breakiterator_class.h"
20 #include <zend_exceptions.h>
21 #include <limits.h>
22 }
23 
24 #include "../intl_convertcpp.h"
25 #include "../intl_common.h"
26 
27 using icu::RuleBasedBreakIterator;
28 using icu::Locale;
29 
fetch_rbbi(BreakIterator_object * bio)30 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
31 	return (RuleBasedBreakIterator*)bio->biter;
32 }
33 
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS,zend_error_handling * error_handling,bool * error_handling_replaced)34 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_handling, bool *error_handling_replaced)
35 {
36 	char		*rules;
37 	size_t		rules_len;
38 	bool	compiled	= 0;
39 	UErrorCode	status		= U_ZERO_ERROR;
40 	BREAKITER_METHOD_INIT_VARS;
41 	object = ZEND_THIS;
42 
43 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
44 			&rules, &rules_len, &compiled) == FAILURE) {
45 		RETURN_THROWS();
46 	}
47 
48 	BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK;
49 	if (bio->biter) {
50 		zend_throw_error(NULL, "IntlRuleBasedBreakIterator object is already constructed");
51 		RETURN_THROWS();
52 	}
53 
54 	zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, error_handling);
55 	*error_handling_replaced = 1;
56 
57 	// instantiation of ICU object
58 	RuleBasedBreakIterator *rbbi;
59 
60 	if (!compiled) {
61 		UnicodeString	rulesStr;
62 		UParseError		parseError = UParseError();
63 		if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
64 				== FAILURE) {
65 			zend_throw_exception(IntlException_ce_ptr,
66 				"IntlRuleBasedBreakIterator::__construct(): "
67 				"rules were not a valid UTF-8 string", 0);
68 			RETURN_THROWS();
69 		}
70 
71 		rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
72 		intl_error_set_code(NULL, status);
73 		if (U_FAILURE(status)) {
74 			smart_str parse_error_str;
75 			parse_error_str = intl_parse_error_to_string(&parseError);
76 			zend_throw_exception_ex(IntlException_ce_ptr, 0,
77 				"IntlRuleBasedBreakIterator::__construct(): "
78 				"unable to create RuleBasedBreakIterator from rules (%s)",
79 				parse_error_str.s ? ZSTR_VAL(parse_error_str.s) : "");
80 			smart_str_free(&parse_error_str);
81 			delete rbbi;
82 			RETURN_THROWS();
83 		}
84 	} else { // compiled
85 		rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
86 		if (U_FAILURE(status)) {
87 			zend_throw_exception(IntlException_ce_ptr,
88 				"IntlRuleBasedBreakIterator::__construct(): "
89 				"unable to create instance from compiled rules", 0);
90 			delete rbbi;
91 			RETURN_THROWS();
92 		}
93 	}
94 
95 	breakiterator_object_create(return_value, rbbi, 0);
96 }
97 
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)98 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
99 {
100 	zend_error_handling error_handling;
101 	bool error_handling_replaced = 0;
102 
103 	return_value = ZEND_THIS;
104 	_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU, &error_handling, &error_handling_replaced);
105 	if (error_handling_replaced) {
106 		zend_restore_error_handling(&error_handling);
107 	}
108 }
109 
PHP_METHOD(IntlRuleBasedBreakIterator,getRules)110 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRules)
111 {
112 	BREAKITER_METHOD_INIT_VARS;
113 	object = ZEND_THIS;
114 
115 	if (zend_parse_parameters_none() == FAILURE) {
116 		RETURN_THROWS();
117 	}
118 
119 	BREAKITER_METHOD_FETCH_OBJECT;
120 
121 	zend_string *u8str;
122 	const UnicodeString rules = fetch_rbbi(bio)->getRules();
123 
124 	u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
125 	if (!u8str)
126 	{
127 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
128 				"rbbi_hash_code: Error converting result to UTF-8 string",
129 				0);
130 		RETURN_FALSE;
131 	}
132 	RETVAL_STR(u8str);
133 }
134 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatus)135 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatus)
136 {
137 	BREAKITER_METHOD_INIT_VARS;
138 	object = ZEND_THIS;
139 
140 	if (zend_parse_parameters_none() == FAILURE) {
141 		RETURN_THROWS();
142 	}
143 
144 	BREAKITER_METHOD_FETCH_OBJECT;
145 
146 	RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
147 }
148 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatusVec)149 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatusVec)
150 {
151 	BREAKITER_METHOD_INIT_VARS;
152 	object = ZEND_THIS;
153 
154 	if (zend_parse_parameters_none() == FAILURE) {
155 		RETURN_THROWS();
156 	}
157 
158 	BREAKITER_METHOD_FETCH_OBJECT;
159 
160 	int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
161 			BREAKITER_ERROR_CODE(bio));
162 
163 	ZEND_ASSERT(BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR);
164 	BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
165 
166 	int32_t *rules = new int32_t[num_rules];
167 	num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
168 			BREAKITER_ERROR_CODE(bio));
169 	if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
170 		delete[] rules;
171 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
172 				"rbbi_get_rule_status_vec: failed obtaining the status values",
173 				0);
174 		RETURN_FALSE;
175 	}
176 
177 	array_init_size(return_value, num_rules);
178 	for (int32_t i = 0; i < num_rules; i++) {
179 		add_next_index_long(return_value, rules[i]);
180 	}
181 	delete[] rules;
182 }
183 
PHP_METHOD(IntlRuleBasedBreakIterator,getBinaryRules)184 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getBinaryRules)
185 {
186 	BREAKITER_METHOD_INIT_VARS;
187 	object = ZEND_THIS;
188 
189 	if (zend_parse_parameters_none() == FAILURE) {
190 		RETURN_THROWS();
191 	}
192 
193 	BREAKITER_METHOD_FETCH_OBJECT;
194 
195 	uint32_t		rules_len;
196 	const uint8_t	*rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
197 
198 	if (rules_len > INT_MAX - 1) {
199 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
200 				"rbbi_get_binary_rules: the rules are too large",
201 				0);
202 		RETURN_FALSE;
203 	}
204 
205 	zend_string *ret_rules = zend_string_alloc(rules_len, 0);
206 	memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
207 	ZSTR_VAL(ret_rules)[rules_len] = '\0';
208 
209 	RETURN_STR(ret_rules);
210 }
211