xref: /PHP-7.4/ext/intl/intl_convert.c (revision 92ac598a)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
14    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
15    +----------------------------------------------------------------------+
16  */
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <php.h>
23 
24 #include "intl_common.h"
25 #include "intl_convert.h"
26 
27 /* {{{ intl_convert_utf8_to_utf16
28  * Convert given string from UTF-8 to UTF-16 to *target buffer.
29  *
30  * It *target is NULL then we allocate a large enough buffer,
31  * store the converted string into it, and make target point to it.
32  *
33  * Otherwise, if *target is non-NULL, we assume that it points to a
34  * dynamically allocated buffer of *target_len bytes length.
35  * In this case the buffer will be used to store the converted string to,
36  * and may be resized (made larger) if needed.
37  *
38  * Note that ICU uses int32_t as string length and PHP uses size_t. While
39  * it is not likely in practical situations to have strings longer than
40  * INT32_MAX, these are different types and need to be handled carefully.
41  *
42  * @param target      Where to place the result.
43  * @param target_len  Result length.
44  * @param source      String to convert.
45  * @param source_len  Length of the source string.
46  * @param status      Conversion status.
47  *
48  * @return void       This function does not return anything.
49  */
intl_convert_utf8_to_utf16(UChar ** target,int32_t * target_len,const char * src,size_t src_len,UErrorCode * status)50 void intl_convert_utf8_to_utf16(
51 	UChar**     target, int32_t* target_len,
52 	const char* src,    size_t  src_len,
53 	UErrorCode* status )
54 {
55 	UChar*      dst_buf = NULL;
56 	int32_t     dst_len = 0;
57 
58 	/* If *target is NULL determine required destination buffer size (pre-flighting).
59 	 * Otherwise, attempt to convert source string; if *target buffer is not large enough
60 	 * it will be resized appropriately.
61 	 */
62 	*status = U_ZERO_ERROR;
63 
64 	if(src_len > INT32_MAX) {
65 		/* we can not fit this string */
66 		*status = U_BUFFER_OVERFLOW_ERROR;
67 		return;
68 	}
69 
70 	u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status );
71 
72 	if( *status == U_ZERO_ERROR )
73 	{
74 		/* String is converted successfully */
75 		(*target)[dst_len] = 0;
76 		*target_len = dst_len;
77 		return;
78 	}
79 
80 	/* Bail out if an unexpected error occurred.
81 	 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
82 	 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
83 	 */
84 	if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
85 		return;
86 
87 	/* Allocate memory for the destination buffer (it will be zero-terminated). */
88 	dst_buf = eumalloc( dst_len + 1 );
89 
90 	/* Convert source string from UTF-8 to UTF-16. */
91 	*status = U_ZERO_ERROR;
92 	u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status );
93 	if( U_FAILURE( *status ) )
94 	{
95 		efree( dst_buf );
96 		return;
97 	}
98 
99 	dst_buf[dst_len] = 0;
100 
101 	if( *target )
102 		efree( *target );
103 
104 	*target     = dst_buf;
105 	*target_len = dst_len;
106 }
107 /* }}} */
108 
109 /* {{{ intl_convert_utf16_to_utf8
110  * Convert given string from UTF-16 to UTF-8.
111  *
112  * @param source      String to convert.
113  * @param source_len  Length of the source string.
114  * @param status      Conversion status.
115  *
116  * @return zend_string
117  */
intl_convert_utf16_to_utf8(const UChar * src,int32_t src_len,UErrorCode * status)118 zend_string* intl_convert_utf16_to_utf8(
119 	const UChar* src,    int32_t  src_len,
120 	UErrorCode*  status )
121 {
122 	zend_string* dst;
123 	int32_t      dst_len;
124 
125 	/* Determine required destination buffer size (pre-flighting). */
126 	*status = U_ZERO_ERROR;
127 	u_strToUTF8( NULL, 0, &dst_len, src, src_len, status );
128 
129 	/* Bail out if an unexpected error occurred.
130 	 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
131 	 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
132 	 */
133 	if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
134 		return NULL;
135 
136 	/* Allocate memory for the destination buffer (it will be zero-terminated). */
137 	dst = zend_string_alloc(dst_len, 0);
138 
139 	/* Convert source string from UTF-8 to UTF-16. */
140 	*status = U_ZERO_ERROR;
141 	u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status );
142 	if( U_FAILURE( *status ) )
143 	{
144 		zend_string_efree(dst);
145 		return NULL;
146 	}
147 
148 	/* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
149 	*status = U_ZERO_ERROR;
150 
151 	ZSTR_VAL(dst)[dst_len] = 0;
152 	return dst;
153 }
154 /* }}} */
155