xref: /php-src/ext/intl/intl_convert.c (revision c3dda473)
1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
12    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
13    +----------------------------------------------------------------------+
14  */
15 
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include <php.h>
21 
22 #include "intl_common.h"
23 #include "intl_convert.h"
24 
25 /* {{{ intl_convert_utf8_to_utf16
26  * Convert given string from UTF-8 to UTF-16 to *target buffer.
27  *
28  * It *target is NULL then we allocate a large enough buffer,
29  * store the converted string into it, and make target point to it.
30  *
31  * Otherwise, if *target is non-NULL, we assume that it points to a
32  * dynamically allocated buffer of *target_len bytes length.
33  * In this case the buffer will be used to store the converted string to,
34  * and may be resized (made larger) if needed.
35  *
36  * Note that ICU uses int32_t as string length and PHP uses size_t. While
37  * it is not likely in practical situations to have strings longer than
38  * INT32_MAX, these are different types and need to be handled carefully.
39  *
40  * @param target      Where to place the result.
41  * @param target_len  Result length.
42  * @param source      String to convert.
43  * @param source_len  Length of the source string.
44  * @param status      Conversion status.
45  *
46  * @return void       This function does not return anything.
47  */
intl_convert_utf8_to_utf16(UChar ** target,int32_t * target_len,const char * src,size_t src_len,UErrorCode * status)48 void intl_convert_utf8_to_utf16(
49 	UChar**     target, int32_t* target_len,
50 	const char* src,    size_t  src_len,
51 	UErrorCode* status )
52 {
53 	UChar*      dst_buf = NULL;
54 	int32_t     dst_len = 0;
55 
56 	/* If *target is NULL determine required destination buffer size (pre-flighting).
57 	 * Otherwise, attempt to convert source string; if *target buffer is not large enough
58 	 * it will be resized appropriately.
59 	 */
60 	*status = U_ZERO_ERROR;
61 
62 	if(src_len > INT32_MAX) {
63 		/* we cannot fit this string */
64 		*status = U_BUFFER_OVERFLOW_ERROR;
65 		return;
66 	}
67 
68 	u_strFromUTF8( *target, *target_len, &dst_len, src, (int32_t)src_len, status );
69 
70 	if( *status == U_ZERO_ERROR )
71 	{
72 		/* String is converted successfully */
73 		(*target)[dst_len] = 0;
74 		*target_len = dst_len;
75 		return;
76 	}
77 
78 	/* Bail out if an unexpected error occurred.
79 	 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
80 	 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
81 	 */
82 	if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
83 		return;
84 
85 	/* Allocate memory for the destination buffer (it will be zero-terminated). */
86 	dst_buf = eumalloc( dst_len + 1 );
87 
88 	/* Convert source string from UTF-8 to UTF-16. */
89 	*status = U_ZERO_ERROR;
90 	u_strFromUTF8( dst_buf, dst_len+1, NULL, src, src_len, status );
91 	if( U_FAILURE( *status ) )
92 	{
93 		efree( dst_buf );
94 		return;
95 	}
96 
97 	dst_buf[dst_len] = 0;
98 
99 	if( *target )
100 		efree( *target );
101 
102 	*target     = dst_buf;
103 	*target_len = dst_len;
104 }
105 /* }}} */
106 
107 /* {{{ intl_convert_utf16_to_utf8
108  * Convert given string from UTF-16 to UTF-8.
109  *
110  * @param source      String to convert.
111  * @param source_len  Length of the source string.
112  * @param status      Conversion status.
113  *
114  * @return zend_string
115  */
intl_convert_utf16_to_utf8(const UChar * src,int32_t src_len,UErrorCode * status)116 zend_string* intl_convert_utf16_to_utf8(
117 	const UChar* src,    int32_t  src_len,
118 	UErrorCode*  status )
119 {
120 	zend_string* dst;
121 	int32_t      dst_len;
122 
123 	/* Determine required destination buffer size (pre-flighting). */
124 	*status = U_ZERO_ERROR;
125 	u_strToUTF8( NULL, 0, &dst_len, src, src_len, status );
126 
127 	/* Bail out if an unexpected error occurred.
128 	 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
129 	 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string is empty).
130 	 */
131 	if( *status != U_BUFFER_OVERFLOW_ERROR && *status != U_STRING_NOT_TERMINATED_WARNING )
132 		return NULL;
133 
134 	/* Allocate memory for the destination buffer (it will be zero-terminated). */
135 	dst = zend_string_alloc(dst_len, 0);
136 
137 	/* Convert source string from UTF-8 to UTF-16. */
138 	*status = U_ZERO_ERROR;
139 	u_strToUTF8( ZSTR_VAL(dst), dst_len, NULL, src, src_len, status );
140 	if( U_FAILURE( *status ) )
141 	{
142 		zend_string_efree(dst);
143 		return NULL;
144 	}
145 
146 	/* U_STRING_NOT_TERMINATED_WARNING is OK for us => reset 'status'. */
147 	*status = U_ZERO_ERROR;
148 
149 	ZSTR_VAL(dst)[dst_len] = 0;
150 	return dst;
151 }
152 /* }}} */
153