1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include <unicode/rbbi.h>
18 
19 extern "C" {
20 #define USE_BREAKITERATOR_POINTER 1
21 #include "breakiterator_class.h"
22 #include <zend_exceptions.h>
23 #include <limits.h>
24 }
25 
26 #include "../intl_convertcpp.h"
27 #include "../intl_common.h"
28 
fetch_rbbi(BreakIterator_object * bio)29 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
30 	return (RuleBasedBreakIterator*)bio->biter;
31 }
32 
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)33 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
34 {
35 	char		*rules;
36 	size_t		rules_len;
37 	zend_bool	compiled	= 0;
38 	UErrorCode	status		= U_ZERO_ERROR;
39 	intl_error_reset(NULL);
40 
41 	if (zend_parse_parameters_throw(ZEND_NUM_ARGS(), "s|b",
42 			&rules, &rules_len, &compiled) == FAILURE) {
43 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
44 			"rbbi_create_instance: bad arguments", 0);
45 		return;
46 	}
47 
48 	// instantiation of ICU object
49 	RuleBasedBreakIterator *rbbi;
50 
51 	if (!compiled) {
52 		UnicodeString	rulesStr;
53 		UParseError		parseError = UParseError();
54 		if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
55 				== FAILURE) {
56 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
57 				"rbbi_create_instance: rules were not a valid UTF-8 string",
58 				0);
59 			RETURN_NULL();
60 		}
61 
62 		rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
63 		intl_error_set_code(NULL, status);
64 		if (U_FAILURE(status)) {
65 			char *msg;
66 			smart_str parse_error_str;
67 			parse_error_str = intl_parse_error_to_string(&parseError);
68 			spprintf(&msg, 0, "rbbi_create_instance: unable to create "
69 				"RuleBasedBreakIterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "");
70 			smart_str_free(&parse_error_str);
71 			intl_error_set_custom_msg(NULL, msg, 1);
72 			efree(msg);
73 			delete rbbi;
74 			return;
75 		}
76 	} else { // compiled
77 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
78 		rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
79 		if (U_FAILURE(status)) {
80 			intl_error_set(NULL, status, "rbbi_create_instance: unable to "
81 				"create instance from compiled rules", 0);
82 			delete rbbi;
83 			return;
84 		}
85 #else
86 		intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: "
87 			"compiled rules require ICU >= 4.8", 0);
88 		return;
89 #endif
90 	}
91 
92 	breakiterator_object_create(return_value, rbbi, 0);
93 }
94 
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)95 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
96 {
97 	zend_error_handling error_handling;
98 
99 	zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, &error_handling);
100 	return_value = getThis();
101 	_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
102 	zend_restore_error_handling(&error_handling);
103 }
104 
PHP_FUNCTION(rbbi_get_rules)105 U_CFUNC PHP_FUNCTION(rbbi_get_rules)
106 {
107 	BREAKITER_METHOD_INIT_VARS;
108 	object = getThis();
109 
110 	if (zend_parse_parameters_none() == FAILURE) {
111 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
112 			"rbbi_get_rules: bad arguments", 0);
113 		RETURN_FALSE;
114 	}
115 
116 	BREAKITER_METHOD_FETCH_OBJECT;
117 
118 	zend_string *u8str;
119 	const UnicodeString rules = fetch_rbbi(bio)->getRules();
120 
121 	u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
122 	if (!u8str)
123 	{
124 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
125 				"rbbi_hash_code: Error converting result to UTF-8 string",
126 				0);
127 		RETURN_FALSE;
128 	}
129 	RETVAL_STR(u8str);
130 }
131 
PHP_FUNCTION(rbbi_get_rule_status)132 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status)
133 {
134 	BREAKITER_METHOD_INIT_VARS;
135 	object = getThis();
136 
137 	if (zend_parse_parameters_none() == FAILURE) {
138 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
139 			"rbbi_get_rule_status: bad arguments", 0);
140 		RETURN_FALSE;
141 	}
142 
143 	BREAKITER_METHOD_FETCH_OBJECT;
144 
145 	RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
146 }
147 
PHP_FUNCTION(rbbi_get_rule_status_vec)148 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec)
149 {
150 	BREAKITER_METHOD_INIT_VARS;
151 	object = getThis();
152 
153 	if (zend_parse_parameters_none() == FAILURE) {
154 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
155 			"rbbi_get_rule_status_vec: bad arguments", 0);
156 		RETURN_FALSE;
157 	}
158 
159 	BREAKITER_METHOD_FETCH_OBJECT;
160 
161 	int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
162 			BREAKITER_ERROR_CODE(bio));
163 	if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) {
164 		BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
165 	} else {
166 		// should not happen
167 		INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed "
168 				" determining the number of status values");
169 	}
170 	int32_t *rules = new int32_t[num_rules];
171 	num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
172 			BREAKITER_ERROR_CODE(bio));
173 	if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
174 		delete[] rules;
175 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
176 				"rbbi_get_rule_status_vec: failed obtaining the status values",
177 				0);
178 		RETURN_FALSE;
179 	}
180 
181 	array_init_size(return_value, num_rules);
182 	for (int32_t i = 0; i < num_rules; i++) {
183 		add_next_index_long(return_value, rules[i]);
184 	}
185 	delete[] rules;
186 }
187 
188 #if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
PHP_FUNCTION(rbbi_get_binary_rules)189 U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules)
190 {
191 	BREAKITER_METHOD_INIT_VARS;
192 	object = getThis();
193 
194 	if (zend_parse_parameters_none() == FAILURE) {
195 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
196 			"rbbi_get_binary_rules: bad arguments", 0);
197 		RETURN_FALSE;
198 	}
199 
200 	BREAKITER_METHOD_FETCH_OBJECT;
201 
202 	uint32_t		rules_len;
203 	const uint8_t	*rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
204 
205 	if (rules_len > INT_MAX - 1) {
206 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
207 				"rbbi_get_binary_rules: the rules are too large",
208 				0);
209 		RETURN_FALSE;
210 	}
211 
212 	zend_string *ret_rules = zend_string_alloc(rules_len, 0);
213 	memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
214 	ZSTR_VAL(ret_rules)[rules_len] = '\0';
215 
216 	RETURN_STR(ret_rules);
217 }
218 #endif
219