1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Ed Batutis <ed@batutis.com> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include "php_intl.h"
22 #include "unicode/unorm.h"
23 #include "normalizer.h"
24 #include "normalizer_class.h"
25 #include "normalizer_normalize.h"
26 #include "intl_convert.h"
27
28 /* {{{ proto string Normalizer::normalize( string $input [, string $form = FORM_C] )
29 * Normalize a string. }}} */
30 /* {{{ proto string normalizer_normalize( string $input [, string $form = FORM_C] )
31 * Normalize a string.
32 */
PHP_FUNCTION(normalizer_normalize)33 PHP_FUNCTION( normalizer_normalize )
34 {
35 char* input = NULL;
36 /* form is optional, defaults to FORM_C */
37 zend_long form = NORMALIZER_DEFAULT;
38 size_t input_len = 0;
39
40 UChar* uinput = NULL;
41 int32_t uinput_len = 0;
42 int expansion_factor = 1;
43 UErrorCode status = U_ZERO_ERROR;
44
45 UChar* uret_buf = NULL;
46 int32_t uret_len = 0;
47
48 zend_string* u8str;
49
50 int32_t size_needed;
51
52 intl_error_reset( NULL );
53
54 /* Parse parameters. */
55 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "s|l",
56 &input, &input_len, &form ) == FAILURE )
57 {
58 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
59 "normalizer_normalize: unable to parse input params", 0 );
60
61 RETURN_FALSE;
62 }
63
64 expansion_factor = 1;
65
66 switch(form) {
67 case NORMALIZER_NONE:
68 break;
69 case NORMALIZER_FORM_D:
70 expansion_factor = 3;
71 break;
72 case NORMALIZER_FORM_KD:
73 expansion_factor = 3;
74 break;
75 case NORMALIZER_FORM_C:
76 case NORMALIZER_FORM_KC:
77 break;
78 default:
79 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
80 "normalizer_normalize: illegal normalization form", 0 );
81 RETURN_FALSE;
82 }
83
84 /*
85 * Normalize string (converting it to UTF-16 first).
86 */
87
88 /* First convert the string to UTF-16. */
89 intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
90
91 if( U_FAILURE( status ) )
92 {
93 /* Set global error code. */
94 intl_error_set_code( NULL, status );
95
96 /* Set error messages. */
97 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 );
98 if (uinput) {
99 efree( uinput );
100 }
101 RETURN_FALSE;
102 }
103
104
105 /* Allocate memory for the destination buffer for normalization */
106 uret_len = uinput_len * expansion_factor;
107 uret_buf = eumalloc( uret_len + 1 );
108
109 /* normalize */
110 size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
111
112 /* Bail out if an unexpected error occurred.
113 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
114 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
115 */
116 if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) {
117 efree( uret_buf );
118 efree( uinput );
119 RETURN_NULL();
120 }
121
122 if ( size_needed > uret_len ) {
123 /* realloc does not seem to work properly - memory is corrupted
124 * uret_buf = eurealloc(uret_buf, size_needed + 1);
125 */
126 efree( uret_buf );
127 uret_buf = eumalloc( size_needed + 1 );
128 uret_len = size_needed;
129
130 status = U_ZERO_ERROR;
131
132 /* try normalize again */
133 size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 /* options */, uret_buf, uret_len, &status);
134
135 /* Bail out if an unexpected error occurred. */
136 if( U_FAILURE(status) ) {
137 /* Set error messages. */
138 intl_error_set_custom_msg( NULL,"Error normalizing string", 0 );
139 efree( uret_buf );
140 efree( uinput );
141 RETURN_FALSE;
142 }
143 }
144
145 efree( uinput );
146
147 /* the buffer we actually used */
148 uret_len = size_needed;
149
150 /* Convert normalized string from UTF-16 to UTF-8. */
151 u8str = intl_convert_utf16_to_utf8(uret_buf, uret_len, &status );
152 efree( uret_buf );
153 if( !u8str )
154 {
155 intl_error_set( NULL, status,
156 "normalizer_normalize: error converting normalized text UTF-8", 0 );
157 RETURN_FALSE;
158 }
159
160 /* Return it. */
161 RETVAL_NEW_STR( u8str );
162 }
163 /* }}} */
164
165 /* {{{ proto bool Normalizer::isNormalized( string $input [, string $form = FORM_C] )
166 * Test if a string is in a given normalization form. }}} */
167 /* {{{ proto bool normalizer_is_normalize( string $input [, string $form = FORM_C] )
168 * Test if a string is in a given normalization form.
169 */
PHP_FUNCTION(normalizer_is_normalized)170 PHP_FUNCTION( normalizer_is_normalized )
171 {
172 char* input = NULL;
173 /* form is optional, defaults to FORM_C */
174 zend_long form = NORMALIZER_DEFAULT;
175 size_t input_len = 0;
176
177 UChar* uinput = NULL;
178 int uinput_len = 0;
179 UErrorCode status = U_ZERO_ERROR;
180
181 UBool uret = FALSE;
182
183 intl_error_reset( NULL );
184
185 /* Parse parameters. */
186 if( zend_parse_method_parameters( ZEND_NUM_ARGS(), getThis(), "s|l",
187 &input, &input_len, &form) == FAILURE )
188 {
189 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
190 "normalizer_is_normalized: unable to parse input params", 0 );
191
192 RETURN_FALSE;
193 }
194
195 switch(form) {
196 /* case NORMALIZER_NONE: not allowed - doesn't make sense */
197
198 case NORMALIZER_FORM_D:
199 case NORMALIZER_FORM_KD:
200 case NORMALIZER_FORM_C:
201 case NORMALIZER_FORM_KC:
202 break;
203 default:
204 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
205 "normalizer_normalize: illegal normalization form", 0 );
206 RETURN_FALSE;
207 }
208
209
210 /*
211 * Test normalization of string (converting it to UTF-16 first).
212 */
213
214 /* First convert the string to UTF-16. */
215 intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
216
217 if( U_FAILURE( status ) )
218 {
219 /* Set global error code. */
220 intl_error_set_code( NULL, status );
221
222 /* Set error messages. */
223 intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 );
224 if (uinput) {
225 efree( uinput );
226 }
227 RETURN_FALSE;
228 }
229
230
231 /* test string */
232 uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 /* options */, &status);
233
234 efree( uinput );
235
236 /* Bail out if an unexpected error occurred. */
237 if( U_FAILURE(status) ) {
238 /* Set error messages. */
239 intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 );
240 RETURN_FALSE;
241 }
242
243 if ( uret )
244 RETURN_TRUE;
245
246 RETURN_FALSE;
247 }
248 /* }}} */
249
250 /*
251 * Local variables:
252 * tab-width: 4
253 * c-basic-offset: 4
254 * End:
255 * vim600: noet sw=4 ts=4 fdm=marker
256 * vim<600: noet sw=4 ts=4
257 */
258