1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include <unicode/rbbi.h>
18 
19 extern "C" {
20 #define USE_BREAKITERATOR_POINTER 1
21 #include "breakiterator_class.h"
22 #include <zend_exceptions.h>
23 #include <limits.h>
24 }
25 
26 #include "../intl_convertcpp.h"
27 #include "../intl_common.h"
28 
29 using icu::RuleBasedBreakIterator;
30 using icu::Locale;
31 
fetch_rbbi(BreakIterator_object * bio)32 static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
33 	return (RuleBasedBreakIterator*)bio->biter;
34 }
35 
_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)36 static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
37 {
38 	char		*rules;
39 	size_t		rules_len;
40 	zend_bool	compiled	= 0;
41 	UErrorCode	status		= U_ZERO_ERROR;
42 	intl_error_reset(NULL);
43 
44 	if (zend_parse_parameters_throw(ZEND_NUM_ARGS(), "s|b",
45 			&rules, &rules_len, &compiled) == FAILURE) {
46 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
47 			"rbbi_create_instance: bad arguments", 0);
48 		return;
49 	}
50 
51 	// instantiation of ICU object
52 	RuleBasedBreakIterator *rbbi;
53 
54 	if (!compiled) {
55 		UnicodeString	rulesStr;
56 		UParseError		parseError = UParseError();
57 		if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
58 				== FAILURE) {
59 			intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
60 				"rbbi_create_instance: rules were not a valid UTF-8 string",
61 				0);
62 			RETURN_NULL();
63 		}
64 
65 		rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
66 		intl_error_set_code(NULL, status);
67 		if (U_FAILURE(status)) {
68 			char *msg;
69 			smart_str parse_error_str;
70 			parse_error_str = intl_parse_error_to_string(&parseError);
71 			spprintf(&msg, 0, "rbbi_create_instance: unable to create "
72 				"RuleBasedBreakIterator from rules (%s)", parse_error_str.s? ZSTR_VAL(parse_error_str.s) : "");
73 			smart_str_free(&parse_error_str);
74 			intl_error_set_custom_msg(NULL, msg, 1);
75 			efree(msg);
76 			delete rbbi;
77 			return;
78 		}
79 	} else { // compiled
80 		rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
81 		if (U_FAILURE(status)) {
82 			intl_error_set(NULL, status, "rbbi_create_instance: unable to "
83 				"create instance from compiled rules", 0);
84 			delete rbbi;
85 			return;
86 		}
87 	}
88 
89 	breakiterator_object_create(return_value, rbbi, 0);
90 }
91 
PHP_METHOD(IntlRuleBasedBreakIterator,__construct)92 U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
93 {
94 	zend_error_handling error_handling;
95 
96 	zend_replace_error_handling(EH_THROW, IntlException_ce_ptr, &error_handling);
97 	return_value = ZEND_THIS;
98 	_php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
99 	zend_restore_error_handling(&error_handling);
100 }
101 
PHP_FUNCTION(rbbi_get_rules)102 U_CFUNC PHP_FUNCTION(rbbi_get_rules)
103 {
104 	BREAKITER_METHOD_INIT_VARS;
105 	object = ZEND_THIS;
106 
107 	if (zend_parse_parameters_none() == FAILURE) {
108 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
109 			"rbbi_get_rules: bad arguments", 0);
110 		RETURN_FALSE;
111 	}
112 
113 	BREAKITER_METHOD_FETCH_OBJECT;
114 
115 	zend_string *u8str;
116 	const UnicodeString rules = fetch_rbbi(bio)->getRules();
117 
118 	u8str = intl_charFromString(rules, BREAKITER_ERROR_CODE_P(bio));
119 	if (!u8str)
120 	{
121 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
122 				"rbbi_hash_code: Error converting result to UTF-8 string",
123 				0);
124 		RETURN_FALSE;
125 	}
126 	RETVAL_STR(u8str);
127 }
128 
PHP_FUNCTION(rbbi_get_rule_status)129 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status)
130 {
131 	BREAKITER_METHOD_INIT_VARS;
132 	object = ZEND_THIS;
133 
134 	if (zend_parse_parameters_none() == FAILURE) {
135 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
136 			"rbbi_get_rule_status: bad arguments", 0);
137 		RETURN_FALSE;
138 	}
139 
140 	BREAKITER_METHOD_FETCH_OBJECT;
141 
142 	RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
143 }
144 
PHP_FUNCTION(rbbi_get_rule_status_vec)145 U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec)
146 {
147 	BREAKITER_METHOD_INIT_VARS;
148 	object = ZEND_THIS;
149 
150 	if (zend_parse_parameters_none() == FAILURE) {
151 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
152 			"rbbi_get_rule_status_vec: bad arguments", 0);
153 		RETURN_FALSE;
154 	}
155 
156 	BREAKITER_METHOD_FETCH_OBJECT;
157 
158 	int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
159 			BREAKITER_ERROR_CODE(bio));
160 	if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) {
161 		BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
162 	} else {
163 		// should not happen
164 		INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed "
165 				" determining the number of status values");
166 	}
167 	int32_t *rules = new int32_t[num_rules];
168 	num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
169 			BREAKITER_ERROR_CODE(bio));
170 	if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
171 		delete[] rules;
172 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
173 				"rbbi_get_rule_status_vec: failed obtaining the status values",
174 				0);
175 		RETURN_FALSE;
176 	}
177 
178 	array_init_size(return_value, num_rules);
179 	for (int32_t i = 0; i < num_rules; i++) {
180 		add_next_index_long(return_value, rules[i]);
181 	}
182 	delete[] rules;
183 }
184 
PHP_FUNCTION(rbbi_get_binary_rules)185 U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules)
186 {
187 	BREAKITER_METHOD_INIT_VARS;
188 	object = ZEND_THIS;
189 
190 	if (zend_parse_parameters_none() == FAILURE) {
191 		intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
192 			"rbbi_get_binary_rules: bad arguments", 0);
193 		RETURN_FALSE;
194 	}
195 
196 	BREAKITER_METHOD_FETCH_OBJECT;
197 
198 	uint32_t		rules_len;
199 	const uint8_t	*rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
200 
201 	if (rules_len > INT_MAX - 1) {
202 		intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
203 				"rbbi_get_binary_rules: the rules are too large",
204 				0);
205 		RETURN_FALSE;
206 	}
207 
208 	zend_string *ret_rules = zend_string_alloc(rules_len, 0);
209 	memcpy(ZSTR_VAL(ret_rules), rules, rules_len);
210 	ZSTR_VAL(ret_rules)[rules_len] = '\0';
211 
212 	RETURN_STR(ret_rules);
213 }
214