1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13  */
14 
15 #include <unicode/rbbi.h>
16 #include <memory>
17 
18 extern "C" {
19 #define USE_BREAKITERATOR_POINTER 1
20 #include "breakiterator_class.h"
21 #include <zend_exceptions.h>
22 #include <limits.h>
23 }
24 
25 #include "../intl_convertcpp.h"
26 #include "../intl_common.h"
27 
28 using icu::RuleBasedBreakIterator;
29 using icu::Locale;
30 
fetch_rbbi(BreakIterator_object * bio)31 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
32 	return (RuleBasedBreakIterator*)bio->biter;
33 }
34 
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS,zend_error_handling * error_handling,bool * error_handling_replaced)35 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_handling, bool *error_handling_replaced)
36 {
37 	char		*rules;
38 	size_t		rules_len;
39 	bool	compiled	= false;
40 	UErrorCode	status		= U_ZERO_ERROR;
41 	BREAKITER_METHOD_INIT_VARS;
42 	object = ZEND_THIS;
43 
44 	ZEND_PARSE_PARAMETERS_START(1, 2)
45 		Z_PARAM_STRING(rules, rules_len)
46 		Z_PARAM_OPTIONAL
47 		Z_PARAM_BOOL(compiled)
48 	ZEND_PARSE_PARAMETERS_END();
49 
50 	BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK;
51 	if (bio->biter) {
52 		zend_throw_error(NULL, "IntlRuleBasedBreakIterator object is already constructed");
53 		RETURN_THROWS();
54 	}
55 
56 	zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, error_handling);
57 	*error_handling_replaced = 1;
58 
59 	// instantiation of ICU object
60 	RuleBasedBreakIterator *rbbi;
61 
62 	if (!compiled) {
63 		UnicodeString	rulesStr;
64 		UParseError		parseError = UParseError();
65 		if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
66 				== FAILURE) {
67 			zend_throw_exception(IntlException_ce_ptr,
68 				"IntlRuleBasedBreakIterator::__construct(): "
69 				"rules were not a valid UTF-8 string", 0);
70 			RETURN_THROWS();
71 		}
72 
73 		rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
74 		intl_error_set_code(NULL, status);
75 		if (U_FAILURE(status)) {
76 			smart_str parse_error_str;
77 			parse_error_str = intl_parse_error_to_string(&parseError);
78 			zend_throw_exception_ex(IntlException_ce_ptr, 0,
79 				"IntlRuleBasedBreakIterator::__construct(): "
80 				"unable to create RuleBasedBreakIterator from rules (%s)",
81 				parse_error_str.s ? ZSTR_VAL(parse_error_str.s) : "");
82 			smart_str_free(&parse_error_str);
83 			delete rbbi;
84 			RETURN_THROWS();
85 		}
86 	} else { // compiled
87 		rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
88 		if (U_FAILURE(status)) {
89 			zend_throw_exception(IntlException_ce_ptr,
90 				"IntlRuleBasedBreakIterator::__construct(): "
91 				"unable to create instance from compiled rules", 0);
92 			delete rbbi;
93 			RETURN_THROWS();
94 		}
95 	}
96 
97 	breakiterator_object_create(return_value, rbbi, 0);
98 }
99 
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)100 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
101 {
102 	zend_error_handling error_handling;
103 	bool error_handling_replaced = 0;
104 
105 	return_value = ZEND_THIS;
106 	_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU, &error_handling, &error_handling_replaced);
107 	if (error_handling_replaced) {
108 		zend_restore_error_handling(&error_handling);
109 	}
110 }
111 
PHP_METHOD(IntlRuleBasedBreakIterator,getRules)112 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRules)
113 {
114 	BREAKITER_METHOD_INIT_VARS;
115 	object = ZEND_THIS;
116 
117 	ZEND_PARSE_PARAMETERS_NONE();
118 
119 	BREAKITER_METHOD_FETCH_OBJECT;
120 
121 	zend_string *u8str;
122 	const UnicodeString rules = fetch_rbbi(bio)->getRules();
123 
124 	u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
125 	if (!u8str)
126 	{
127 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
128 				"rbbi_hash_code: Error converting result to UTF-8 string",
129 				0);
130 		RETURN_FALSE;
131 	}
132 	RETVAL_STR(u8str);
133 }
134 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatus)135 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatus)
136 {
137 	BREAKITER_METHOD_INIT_VARS;
138 	object = ZEND_THIS;
139 
140 	ZEND_PARSE_PARAMETERS_NONE();
141 
142 	BREAKITER_METHOD_FETCH_OBJECT;
143 
144 	RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
145 }
146 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatusVec)147 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatusVec)
148 {
149 	BREAKITER_METHOD_INIT_VARS;
150 	object = ZEND_THIS;
151 
152 	ZEND_PARSE_PARAMETERS_NONE();
153 
154 	BREAKITER_METHOD_FETCH_OBJECT;
155 
156 	int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
157 			BREAKITER_ERROR_CODE(bio));
158 
159 	ZEND_ASSERT(BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR);
160 	BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
161 
162 	std::unique_ptr<int32_t[]> rules = std::unique_ptr<int32_t[]>(new int32_t[num_rules]);
163 	num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules.get(), num_rules,
164 			BREAKITER_ERROR_CODE(bio));
165 	if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
166 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
167 				"rbbi_get_rule_status_vec: failed obtaining the status values",
168 				0);
169 		RETURN_FALSE;
170 	}
171 
172 	array_init_size(return_value, num_rules);
173 	for (int32_t i = 0; i < num_rules; i++) {
174 		add_next_index_long(return_value, rules[i]);
175 	}
176 }
177 
PHP_METHOD(IntlRuleBasedBreakIterator,getBinaryRules)178 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getBinaryRules)
179 {
180 	BREAKITER_METHOD_INIT_VARS;
181 	object = ZEND_THIS;
182 
183 	ZEND_PARSE_PARAMETERS_NONE();
184 
185 	BREAKITER_METHOD_FETCH_OBJECT;
186 
187 	uint32_t		rules_len;
188 	const uint8_t	*rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
189 
190 	if (rules_len > INT_MAX - 1) {
191 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
192 				"rbbi_get_binary_rules: the rules are too large",
193 				0);
194 		RETURN_FALSE;
195 	}
196 
197 	zend_string *ret_rules = zend_string_alloc(rules_len, 0);
198 	memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
199 	ZSTR_VAL(ret_rules)[rules_len] = '\0';
200 
201 	RETURN_STR(ret_rules);
202 }
203