1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
14 | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
15 +----------------------------------------------------------------------+
16 */
17
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <php.h>
23
24 #include "intl_common.h"
25 #include "intl_convert.h"
26
27 /* {{{ intl_convert_utf8_to_utf16
28 * Convert given string from UTF-8 to UTF-16 to *target buffer.
29 *
30 * It *target is NULL then we allocate a large enough buffer,
31 * store the converted string into it, and make target point to it.
32 *
33 * Otherwise, if *target is non-NULL, we assume that it points to a
34 * dynamically allocated buffer of *target_len bytes length.
35 * In this case the buffer will be used to store the converted string to,
36 * and may be resized (made larger) if needed.
37 *
38 * Note that ICU uses int32_t as string length and PHP uses size_t. While
39 * it is not likely in practical situations to have strings longer than
40 * INT32_MAX, these are different types and need to be handled carefully.
41 *
42 * @param target Where to place the result.
43 * @param target_len Result length.
44 * @param source String to convert.
45 * @param source_len Length of the source string.
46 * @param status Conversion status.
47 *
48 * @return void This function does not return anything.
49 */
intl_convert_utf8_to_utf16(UChar ** target,int32_t * target_len,const char * src,size_t src_len,UErrorCode * status)50 void intl_convert_utf8_to_utf16(
51 UChar** target, int32_t* target_len,
52 const char* src, size_t src_len,
53 UErrorCode* status )
54 {
55 UChar* dst_buf = NULL;
56 int32_t dst_len = 0;
57
58 /* If *target is NULL determine required destination buffer size (pre-flighting).
59 * Otherwise, attempt to convert source string; if *target buffer is not large enough
60 * it will be resized appropriately.
61 */
62 *status = U_ZERO_ERROR;
63
64 if(src_len > INT32_MAX) {
65 /* we can not fit this string */
66 *status = U_BUFFER_OVERFLOW_ERROR;
67 return;
68 }
69
70 u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status );
71
72 if( *status == U_ZERO_ERROR )
73 {
74 /* String is converted successfully */
75 (*target)[dst_len] = 0;
76 *target_len = dst_len;
77 return;
78 }
79
80 /* Bail out if an unexpected error occurred.
81 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
82 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
83 */
84 if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
85 return;
86
87 /* Allocate memory for the destination buffer (it will be zero-terminated). */
88 dst_buf = eumalloc( dst_len + 1 );
89
90 /* Convert source string from UTF-8 to UTF-16. */
91 *status = U_ZERO_ERROR;
92 u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status );
93 if( U_FAILURE( *status ) )
94 {
95 efree( dst_buf );
96 return;
97 }
98
99 dst_buf[dst_len] = 0;
100
101 if( *target )
102 efree( *target );
103
104 *target = dst_buf;
105 *target_len = dst_len;
106 }
107 /* }}} */
108
109 /* {{{ intl_convert_utf16_to_utf8
110 * Convert given string from UTF-16 to UTF-8.
111 *
112 * @param source String to convert.
113 * @param source_len Length of the source string.
114 * @param status Conversion status.
115 *
116 * @return zend_string
117 */
intl_convert_utf16_to_utf8(const UChar * src,int32_t src_len,UErrorCode * status)118 zend_string* intl_convert_utf16_to_utf8(
119 const UChar* src, int32_t src_len,
120 UErrorCode* status )
121 {
122 zend_string* dst;
123 int32_t dst_len;
124
125 /* Determine required destination buffer size (pre-flighting). */
126 *status = U_ZERO_ERROR;
127 u_strToUTF8( NULL, 0, &dst_len, src, src_len, status );
128
129 /* Bail out if an unexpected error occurred.
130 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
131 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
132 */
133 if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
134 return NULL;
135
136 /* Allocate memory for the destination buffer (it will be zero-terminated). */
137 dst = zend_string_alloc(dst_len, 0);
138
139 /* Convert source string from UTF-8 to UTF-16. */
140 *status = U_ZERO_ERROR;
141 u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status );
142 if( U_FAILURE( *status ) )
143 {
144 zend_string_free(dst);
145 return NULL;
146 }
147
148 /* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
149 *status = U_ZERO_ERROR;
150
151 ZSTR_VAL(dst)[dst_len] = 0;
152 return dst;
153 }
154 /* }}} */
155
156 /*
157 * Local variables:
158 * tab-width: 4
159 * c-basic-offset: 4
160 * End:
161 * vim600: noet sw=4 ts=4 fdm=marker
162 * vim<600: noet sw=4 ts=4
163 */
164