1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | http://www.php.net/license/3_01.txt                                  |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Gustavo Lopes <cataphract@php.net>                          |
12    +----------------------------------------------------------------------+
13  */
14 
15 #include <unicode/rbbi.h>
16 
17 extern "C" {
18 #define USE_BREAKITERATOR_POINTER 1
19 #include "breakiterator_class.h"
20 #include <zend_exceptions.h>
21 #include <limits.h>
22 }
23 
24 #include "../intl_convertcpp.h"
25 #include "../intl_common.h"
26 
27 using icu::RuleBasedBreakIterator;
28 using icu::Locale;
29 
fetch_rbbi(BreakIterator_object * bio)30 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
31 	return (RuleBasedBreakIterator*)bio->biter;
32 }
33 
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)34 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
35 {
36 	char		*rules;
37 	size_t		rules_len;
38 	zend_bool	compiled	= 0;
39 	UErrorCode	status		= U_ZERO_ERROR;
40 	BREAKITER_METHOD_INIT_VARS;
41 	object = ZEND_THIS;
42 
43 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|b",
44 			&rules, &rules_len, &compiled) == FAILURE) {
45 		RETURN_THROWS();
46 	}
47 
48 	BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK;
49 	if (bio->biter) {
50 		zend_throw_error(NULL, "IntlRuleBasedBreakIterator object is already constructed");
51 		RETURN_THROWS();
52 	}
53 
54 	// instantiation of ICU object
55 	RuleBasedBreakIterator *rbbi;
56 
57 	if (!compiled) {
58 		UnicodeString	rulesStr;
59 		UParseError		parseError = UParseError();
60 		if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
61 				== FAILURE) {
62 			zend_throw_exception(IntlException_ce_ptr,
63 				"IntlRuleBasedBreakIterator::__construct(): "
64 				"rules were not a valid UTF-8 string", 0);
65 			RETURN_THROWS();
66 		}
67 
68 		rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
69 		intl_error_set_code(NULL, status);
70 		if (U_FAILURE(status)) {
71 			smart_str parse_error_str;
72 			parse_error_str = intl_parse_error_to_string(&parseError);
73 			zend_throw_exception_ex(IntlException_ce_ptr, 0,
74 				"IntlRuleBasedBreakIterator::__construct(): "
75 				"unable to create RuleBasedBreakIterator from rules (%s)",
76 				parse_error_str.s ? ZSTR_VAL(parse_error_str.s) : "");
77 			smart_str_free(&parse_error_str);
78 			delete rbbi;
79 			RETURN_THROWS();
80 		}
81 	} else { // compiled
82 		rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
83 		if (U_FAILURE(status)) {
84 			zend_throw_exception(IntlException_ce_ptr,
85 				"IntlRuleBasedBreakIterator::__construct(): "
86 				"unable to create instance from compiled rules", 0);
87 			delete rbbi;
88 			RETURN_THROWS();
89 		}
90 	}
91 
92 	breakiterator_object_create(return_value, rbbi, 0);
93 }
94 
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)95 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
96 {
97 	zend_error_handling error_handling;
98 
99 	zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, &error_handling);
100 	return_value = ZEND_THIS;
101 	_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
102 	zend_restore_error_handling(&error_handling);
103 }
104 
PHP_METHOD(IntlRuleBasedBreakIterator,getRules)105 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRules)
106 {
107 	BREAKITER_METHOD_INIT_VARS;
108 	object = ZEND_THIS;
109 
110 	if (zend_parse_parameters_none() == FAILURE) {
111 		RETURN_THROWS();
112 	}
113 
114 	BREAKITER_METHOD_FETCH_OBJECT;
115 
116 	zend_string *u8str;
117 	const UnicodeString rules = fetch_rbbi(bio)->getRules();
118 
119 	u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
120 	if (!u8str)
121 	{
122 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
123 				"rbbi_hash_code: Error converting result to UTF-8 string",
124 				0);
125 		RETURN_FALSE;
126 	}
127 	RETVAL_STR(u8str);
128 }
129 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatus)130 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatus)
131 {
132 	BREAKITER_METHOD_INIT_VARS;
133 	object = ZEND_THIS;
134 
135 	if (zend_parse_parameters_none() == FAILURE) {
136 		RETURN_THROWS();
137 	}
138 
139 	BREAKITER_METHOD_FETCH_OBJECT;
140 
141 	RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
142 }
143 
PHP_METHOD(IntlRuleBasedBreakIterator,getRuleStatusVec)144 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getRuleStatusVec)
145 {
146 	BREAKITER_METHOD_INIT_VARS;
147 	object = ZEND_THIS;
148 
149 	if (zend_parse_parameters_none() == FAILURE) {
150 		RETURN_THROWS();
151 	}
152 
153 	BREAKITER_METHOD_FETCH_OBJECT;
154 
155 	int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
156 			BREAKITER_ERROR_CODE(bio));
157 
158 	ZEND_ASSERT(BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR);
159 	BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
160 
161 	int32_t *rules = new int32_t[num_rules];
162 	num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
163 			BREAKITER_ERROR_CODE(bio));
164 	if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
165 		delete[] rules;
166 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
167 				"rbbi_get_rule_status_vec: failed obtaining the status values",
168 				0);
169 		RETURN_FALSE;
170 	}
171 
172 	array_init_size(return_value, num_rules);
173 	for (int32_t i = 0; i < num_rules; i++) {
174 		add_next_index_long(return_value, rules[i]);
175 	}
176 	delete[] rules;
177 }
178 
PHP_METHOD(IntlRuleBasedBreakIterator,getBinaryRules)179 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, getBinaryRules)
180 {
181 	BREAKITER_METHOD_INIT_VARS;
182 	object = ZEND_THIS;
183 
184 	if (zend_parse_parameters_none() == FAILURE) {
185 		RETURN_THROWS();
186 	}
187 
188 	BREAKITER_METHOD_FETCH_OBJECT;
189 
190 	uint32_t		rules_len;
191 	const uint8_t	*rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
192 
193 	if (rules_len > INT_MAX - 1) {
194 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
195 				"rbbi_get_binary_rules: the rules are too large",
196 				0);
197 		RETURN_FALSE;
198 	}
199 
200 	zend_string *ret_rules = zend_string_alloc(rules_len, 0);
201 	memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
202 	ZSTR_VAL(ret_rules)[rules_len] = '\0';
203 
204 	RETURN_STR(ret_rules);
205 }
206