1 /*
2 +----------------------------------------------------------------------+
3 | This source file is subject to version 3.01 of the PHP license, |
4 | that is bundled with this package in the file LICENSE, and is |
5 | available through the world-wide-web at the following url: |
6 | https://www.php.net/license/3_01.txt |
7 | If you did not receive a copy of the PHP license and are unable to |
8 | obtain it through the world-wide-web, please send a note to |
9 | license@php.net so we can mail you a copy immediately. |
10 +----------------------------------------------------------------------+
11 | Authors: Vadim Savchuk <vsavchuk@productengine.com> |
12 | Dmitry Lakhtyuk <dlakhtyuk@productengine.com> |
13 +----------------------------------------------------------------------+
14 */
15
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19
20 #include <php.h>
21
22 #include "intl_common.h"
23 #include "intl_convert.h"
24
25 /* {{{ intl_convert_utf8_to_utf16
26 * Convert given string from UTF-8 to UTF-16 to *target buffer.
27 *
28 * It *target is NULL then we allocate a large enough buffer,
29 * store the converted string into it, and make target point to it.
30 *
31 * Otherwise, if *target is non-NULL, we assume that it points to a
32 * dynamically allocated buffer of *target_len bytes length.
33 * In this case the buffer will be used to store the converted string to,
34 * and may be resized (made larger) if needed.
35 *
36 * Note that ICU uses int32_t as string length and PHP uses size_t. While
37 * it is not likely in practical situations to have strings longer than
38 * INT32_MAX, these are different types and need to be handled carefully.
39 *
40 * @param target Where to place the result.
41 * @param target_len Result length.
42 * @param source String to convert.
43 * @param source_len Length of the source string.
44 * @param status Conversion status.
45 *
46 * @return void This function does not return anything.
47 */
intl_convert_utf8_to_utf16(UChar ** target,int32_t * target_len,const char * src,size_t src_len,UErrorCode * status)48 void intl_convert_utf8_to_utf16(
49 UChar** target, int32_t* target_len,
50 const char* src, size_t src_len,
51 UErrorCode* status )
52 {
53 UChar* dst_buf = NULL;
54 int32_t dst_len = 0;
55
56 /* If *target is NULL determine required destination buffer size (pre-flighting).
57 * Otherwise, attempt to convert source string; if *target buffer is not large enough
58 * it will be resized appropriately.
59 */
60 *status = U_ZERO_ERROR;
61
62 if(src_len > INT32_MAX) {
63 /* we cannot fit this string */
64 *status = U_BUFFER_OVERFLOW_ERROR;
65 return;
66 }
67
68 u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status );
69
70 if( *status == U_ZERO_ERROR )
71 {
72 /* String is converted successfully */
73 (*target)[dst_len] = 0;
74 *target_len = dst_len;
75 return;
76 }
77
78 /* Bail out if an unexpected error occurred.
79 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
80 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
81 */
82 if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
83 return;
84
85 /* Allocate memory for the destination buffer (it will be zero-terminated). */
86 dst_buf = eumalloc( dst_len + 1 );
87
88 /* Convert source string from UTF-8 to UTF-16. */
89 *status = U_ZERO_ERROR;
90 u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status );
91 if( U_FAILURE( *status ) )
92 {
93 efree( dst_buf );
94 return;
95 }
96
97 dst_buf[dst_len] = 0;
98
99 if( *target )
100 efree( *target );
101
102 *target = dst_buf;
103 *target_len = dst_len;
104 }
105 /* }}} */
106
107 /* {{{ intl_convert_utf16_to_utf8
108 * Convert given string from UTF-16 to UTF-8.
109 *
110 * @param source String to convert.
111 * @param source_len Length of the source string.
112 * @param status Conversion status.
113 *
114 * @return zend_string
115 */
intl_convert_utf16_to_utf8(const UChar * src,int32_t src_len,UErrorCode * status)116 zend_string* intl_convert_utf16_to_utf8(
117 const UChar* src, int32_t src_len,
118 UErrorCode* status )
119 {
120 zend_string* dst;
121 int32_t dst_len;
122
123 /* Determine required destination buffer size (pre-flighting). */
124 *status = U_ZERO_ERROR;
125 u_strToUTF8( NULL, 0, &dst_len, src, src_len, status );
126
127 /* Bail out if an unexpected error occurred.
128 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
129 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
130 */
131 if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
132 return NULL;
133
134 /* Allocate memory for the destination buffer (it will be zero-terminated). */
135 dst = zend_string_alloc(dst_len, 0);
136
137 /* Convert source string from UTF-8 to UTF-16. */
138 *status = U_ZERO_ERROR;
139 u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status );
140 if( U_FAILURE( *status ) )
141 {
142 zend_string_efree(dst);
143 return NULL;
144 }
145
146 /* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
147 *status = U_ZERO_ERROR;
148
149 ZSTR_VAL(dst)[dst_len] = 0;
150 return dst;
151 }
152 /* }}} */
153