xref: /PHP-7.4/win32/codepage.h (revision 92ac598a)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) The PHP Group                                          |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Anatol Belski <ab@php.net>                                   |
16    +----------------------------------------------------------------------+
17 */
18 
19 #ifndef PHP_WIN32_CODEPAGE_H
20 #define PHP_WIN32_CODEPAGE_H
21 
22 #ifdef __cplusplus
23 extern "C" {
24 #endif
25 
26 #ifdef PHP_EXPORTS
27 # define PW32CP __declspec(dllexport)
28 #else
29 # define PW32CP __declspec(dllimport)
30 #endif
31 
32 #define PHP_WIN32_CP_IGNORE_LEN (0)
33 #define PHP_WIN32_CP_IGNORE_LEN_P ((size_t *)-1)
34 
35 struct php_win32_cp {
36 	DWORD id;
37 	DWORD to_w_fl;
38 	DWORD from_w_fl;
39 	DWORD char_size;
40 	char *name;
41 	char *enc;
42 	char *desc;
43 };
44 
45 PW32CP BOOL php_win32_cp_use_unicode(void);
46 PW32CP const struct php_win32_cp *php_win32_cp_do_setup(const char *);
47 #define php_win32_cp_setup() php_win32_cp_do_setup(NULL)
48 PW32CP const struct php_win32_cp *php_win32_cp_do_update(const char *);
49 #define php_win32_cp_update() php_win32_cp_do_update(NULL)
50 PW32CP const struct php_win32_cp *php_win32_cp_shutdown(void);
51 PW32CP const struct php_win32_cp *php_win32_cp_get_current(void);
52 PW32CP const struct php_win32_cp *php_win32_cp_get_orig(void);
53 PW32CP const struct php_win32_cp *php_win32_cp_get_by_id(DWORD id);
54 PW32CP const struct php_win32_cp *php_win32_cp_set_by_id(DWORD id);
55 PW32CP const struct php_win32_cp *php_win32_cp_get_by_enc(const char *enc);
56 PW32CP const struct php_win32_cp *php_win32_cp_cli_do_setup(DWORD);
57 #define php_win32_cp_cli_setup() php_win32_cp_cli_do_setup(0)
58 #define php_win32_cp_cli_update() php_win32_cp_cli_do_setup(0)
59 PW32CP const struct php_win32_cp *php_win32_cp_cli_do_restore(DWORD);
60 #define php_win32_cp_cli_restore() php_win32_cp_cli_do_restore(0)
61 
62 /* This API is binary safe and expects a \0 terminated input.
63    The returned out is \0 terminated, but the length doesn't count \0. */
64 PW32CP wchar_t *php_win32_cp_conv_to_w(DWORD in_cp, DWORD flags, const char* in, size_t in_len, size_t *out_len);
65 PW32CP wchar_t *php_win32_cp_conv_utf8_to_w(const char* in, size_t in_len, size_t *out_len);
66 #define php_win32_cp_utf8_to_w(in) php_win32_cp_conv_utf8_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
67 PW32CP wchar_t *php_win32_cp_conv_cur_to_w(const char* in, size_t in_len, size_t *out_len);
68 #define php_win32_cp_cur_to_w(in) php_win32_cp_conv_cur_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
69 PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size_t *out_len);
70 #define php_win32_cp_ascii_to_w(in) php_win32_cp_conv_ascii_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
71 PW32CP char *php_win32_cp_conv_from_w(DWORD out_cp, DWORD flags, const wchar_t* in, size_t in_len, size_t *out_len);
72 PW32CP char *php_win32_cp_conv_w_to_utf8(const wchar_t* in, size_t in_len, size_t *out_len);
73 #define php_win32_cp_w_to_utf8(in) php_win32_cp_conv_w_to_utf8(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
74 PW32CP char *php_win32_cp_conv_w_to_cur(const wchar_t* in, size_t in_len, size_t *out_len);
75 #define php_win32_cp_w_to_cur(in) php_win32_cp_conv_w_to_cur(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
76 PW32CP wchar_t *php_win32_cp_env_any_to_w(const char* env);
77 
78 /* This function tries to make the best guess to convert any
79    given string to a wide char, also preferring the fastest code
80    path to unicode. It returns NULL on fail. */
php_win32_cp_conv_any_to_w(const char * in,size_t in_len,size_t * out_len)81 __forceinline static wchar_t *php_win32_cp_conv_any_to_w(const char* in, size_t in_len, size_t *out_len)
82 {/*{{{*/
83 	wchar_t *ret = NULL;
84 
85 	if (php_win32_cp_use_unicode()) {
86 		/* First try the pure ascii conversion. This is the fastest way to do the
87 			thing. Only applicable if the source string is UTF-8 in general.
88 			While it could possibly be ok with European encodings, usage with
89 			Asian encodings can cause unintended side effects. Lookup the term
90 			"mojibake" if need more. */
91 		ret = php_win32_cp_conv_ascii_to_w(in, in_len, out_len);
92 
93 		/* If that failed, try to convert to multibyte. */
94 		if (!ret) {
95 			ret = php_win32_cp_conv_utf8_to_w(in, in_len, out_len);
96 
97 			/* Still need this fallback with regard to possible broken data
98 				in the existing scripts. Broken data might be hardcoded in
99 				the user scripts, as UTF-8 settings was de facto ignored in
100 				older PHP versions. The fallback can be removed later for
101 				the sake of purity, keep now for BC reasons. */
102 			if (!ret) {
103 				const struct php_win32_cp *acp = php_win32_cp_get_by_id(GetACP());
104 
105 				if (acp) {
106 					ret = php_win32_cp_conv_to_w(acp->id, acp->to_w_fl, in, in_len, out_len);
107 				}
108 			}
109 		}
110 	} else {
111 		/* No unicode, convert from the current thread cp. */
112 		ret = php_win32_cp_conv_cur_to_w(in, in_len, out_len);
113 	}
114 
115 	return ret;
116 }/*}}}*/
117 #define php_win32_cp_any_to_w(in) php_win32_cp_conv_any_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
118 
119 /* This function converts from unicode function output back to PHP. If
120 	the PHP's current charset is not compatible with unicode, so the currently
121 	configured CP will be used. */
php_win32_cp_conv_w_to_any(const wchar_t * in,size_t in_len,size_t * out_len)122 __forceinline static char *php_win32_cp_conv_w_to_any(const wchar_t* in, size_t in_len, size_t *out_len)
123 {/*{{{*/
124 	return php_win32_cp_conv_w_to_cur(in, in_len, out_len);
125 }/*}}}*/
126 #define php_win32_cp_w_to_any(in) php_win32_cp_conv_w_to_any(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
127 
128 #define PHP_WIN32_CP_W_TO_ANY_ARRAY(aw, aw_len, aa, aa_len) do { \
129 	int i; \
130 	aa_len = aw_len; \
131 	aa = (char **) malloc(aw_len * sizeof(char *)); \
132 	if (!aa) { \
133 		break; \
134 	} \
135 	for (i = 0; i < aw_len; i++) { \
136 		aa[i] = php_win32_cp_w_to_any(aw[i]); \
137 	} \
138 } while (0);
139 
140 
141 #define PHP_WIN32_CP_FREE_ARRAY(a, a_len) do { \
142 	int i; \
143 	for (i = 0; i < a_len; i++) { \
144 		free(a[i]); \
145 	} \
146 	free(a); \
147 } while (0);
148 
149 #ifdef __cplusplus
150 }
151 #endif
152 
153 #endif /* PHP_WIN32_CODEPAGE_H */
154