xref: /openssl/apps/lib/win32_init.c (revision fbd2ece1)
1 /*
2  * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <windows.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <malloc.h>
14 
15 #if defined(CP_UTF8)
16 
17 static UINT saved_cp;
18 static int newargc;
19 static char **newargv;
20 
cleanup(void)21 static void cleanup(void)
22 {
23     int i;
24 
25     SetConsoleOutputCP(saved_cp);
26 
27     for (i = 0; i < newargc; i++)
28         free(newargv[i]);
29 
30     free(newargv);
31 }
32 
33 /*
34  * Incrementally [re]allocate newargv and keep it NULL-terminated.
35  */
validate_argv(int argc)36 static int validate_argv(int argc)
37 {
38     static int size = 0;
39 
40     if (argc >= size) {
41         char **ptr;
42 
43         while (argc >= size)
44             size += 64;
45 
46         ptr = realloc(newargv, size * sizeof(newargv[0]));
47         if (ptr == NULL)
48             return 0;
49 
50         (newargv = ptr)[argc] = NULL;
51     } else {
52         newargv[argc] = NULL;
53     }
54 
55     return 1;
56 }
57 
process_glob(WCHAR * wstr,int wlen)58 static int process_glob(WCHAR *wstr, int wlen)
59 {
60     int i, slash, udlen;
61     WCHAR saved_char;
62     WIN32_FIND_DATAW data;
63     HANDLE h;
64 
65     /*
66      * Note that we support wildcard characters only in filename part
67      * of the path, and not in directories. Windows users are used to
68      * this, that's why recursive glob processing is not implemented.
69      */
70     /*
71      * Start by looking for last slash or backslash, ...
72      */
73     for (slash = 0, i = 0; i < wlen; i++)
74         if (wstr[i] == L'/' || wstr[i] == L'\\')
75             slash = i + 1;
76     /*
77      * ... then look for asterisk or question mark in the file name.
78      */
79     for (i = slash; i < wlen; i++)
80         if (wstr[i] == L'*' || wstr[i] == L'?')
81             break;
82 
83     if (i == wlen)
84         return 0;   /* definitely not a glob */
85 
86     saved_char = wstr[wlen];
87     wstr[wlen] = L'\0';
88     h = FindFirstFileW(wstr, &data);
89     wstr[wlen] = saved_char;
90     if (h == INVALID_HANDLE_VALUE)
91         return 0;   /* not a valid glob, just pass... */
92 
93     if (slash)
94         udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
95                                     NULL, 0, NULL, NULL);
96     else
97         udlen = 0;
98 
99     do {
100         int uflen;
101         char *arg;
102 
103         /*
104          * skip over . and ..
105          */
106         if (data.cFileName[0] == L'.') {
107             if ((data.cFileName[1] == L'\0') ||
108                 (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
109                 continue;
110         }
111 
112         if (!validate_argv(newargc + 1))
113             break;
114 
115         /*
116          * -1 below means "scan for trailing '\0' *and* count it",
117          * so that |uflen| covers even trailing '\0'.
118          */
119         uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
120                                     NULL, 0, NULL, NULL);
121 
122         arg = malloc(udlen + uflen);
123         if (arg == NULL)
124             break;
125 
126         if (udlen)
127             WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
128                                 arg, udlen, NULL, NULL);
129 
130         WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
131                             arg + udlen, uflen, NULL, NULL);
132 
133         newargv[newargc++] = arg;
134     } while (FindNextFileW(h, &data));
135 
136     CloseHandle(h);
137 
138     return 1;
139 }
140 
win32_utf8argv(int * argc,char ** argv[])141 void win32_utf8argv(int *argc, char **argv[])
142 {
143     const WCHAR *wcmdline;
144     WCHAR *warg, *wend, *p;
145     int wlen, ulen, valid = 1;
146     char *arg;
147 
148     if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0)
149         return;
150 
151     newargc = 0;
152     newargv = NULL;
153     if (!validate_argv(newargc))
154         return;
155 
156     wcmdline = GetCommandLineW();
157     if (wcmdline == NULL) return;
158 
159     /*
160      * make a copy of the command line, since we might have to modify it...
161      */
162     wlen = wcslen(wcmdline);
163     p = _alloca((wlen + 1) * sizeof(WCHAR));
164     wcscpy(p, wcmdline);
165 
166     while (*p != L'\0') {
167         int in_quote = 0;
168 
169         if (*p == L' ' || *p == L'\t') {
170             p++; /* skip over whitespace */
171             continue;
172         }
173 
174         /*
175          * Note: because we may need to fiddle with the number of backslashes,
176          * the argument string is copied into itself.  This is safe because
177          * the number of characters will never expand.
178          */
179         warg = wend = p;
180         while (*p != L'\0'
181                && (in_quote || (*p != L' ' && *p != L'\t'))) {
182             switch (*p) {
183             case L'\\':
184                 /*
185                  * Microsoft documentation on how backslashes are treated
186                  * is:
187                  *
188                  * + Backslashes are interpreted literally, unless they
189                  *   immediately precede a double quotation mark.
190                  * + If an even number of backslashes is followed by a double
191                  *   quotation mark, one backslash is placed in the argv array
192                  *   for every pair of backslashes, and the double quotation
193                  *   mark is interpreted as a string delimiter.
194                  * + If an odd number of backslashes is followed by a double
195                  *   quotation mark, one backslash is placed in the argv array
196                  *   for every pair of backslashes, and the double quotation
197                  *   mark is "escaped" by the remaining backslash, causing a
198                  *   literal double quotation mark (") to be placed in argv.
199                  *
200                  * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
201                  *
202                  * Though referred page doesn't mention it, multiple qouble
203                  * quotes are also special. Pair of double quotes in quoted
204                  * string is counted as single double quote.
205                  */
206                 {
207                     const WCHAR *q = p;
208                     int i;
209 
210                     while (*p == L'\\')
211                         p++;
212 
213                     if (*p == L'"') {
214                         int i;
215 
216                         for (i = (p - q) / 2; i > 0; i--)
217                             *wend++ = L'\\';
218 
219                         /*
220                          * if odd amount of backslashes before the quote,
221                          * said quote is part of the argument, not a delimiter
222                          */
223                         if ((p - q) % 2 == 1)
224                             *wend++ = *p++;
225                     } else {
226                         for (i = p - q; i > 0; i--)
227                             *wend++ = L'\\';
228                     }
229                 }
230                 break;
231             case L'"':
232                 /*
233                  * Without the preceding backslash (or when preceded with an
234                  * even number of backslashes), the double quote is a simple
235                  * string delimiter and just slightly change the parsing state
236                  */
237                 if (in_quote && p[1] == L'"')
238                     *wend++ = *p++;
239                 else
240                     in_quote = !in_quote;
241                 p++;
242                 break;
243             default:
244                 /*
245                  * Any other non-delimiter character is just taken verbatim
246                  */
247                 *wend++ = *p++;
248             }
249         }
250 
251         wlen = wend - warg;
252 
253         if (wlen == 0 || !process_glob(warg, wlen)) {
254             if (!validate_argv(newargc + 1)) {
255                 valid = 0;
256                 break;
257             }
258 
259             ulen = 0;
260             if (wlen > 0) {
261                 ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
262                                            NULL, 0, NULL, NULL);
263                 if (ulen <= 0)
264                     continue;
265             }
266 
267             arg = malloc(ulen + 1);
268             if (arg == NULL) {
269                 valid = 0;
270                 break;
271             }
272 
273             if (wlen > 0)
274                 WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
275                                     arg, ulen, NULL, NULL);
276             arg[ulen] = '\0';
277 
278             newargv[newargc++] = arg;
279         }
280     }
281 
282     if (valid) {
283         saved_cp = GetConsoleOutputCP();
284         SetConsoleOutputCP(CP_UTF8);
285 
286         *argc = newargc;
287         *argv = newargv;
288 
289         atexit(cleanup);
290     } else if (newargv != NULL) {
291         int i;
292 
293         for (i = 0; i < newargc; i++)
294             free(newargv[i]);
295 
296         free(newargv);
297 
298         newargc = 0;
299         newargv = NULL;
300     }
301 
302     return;
303 }
304 #else
win32_utf8argv(int * argc,char ** argv[])305 void win32_utf8argv(int *argc, char **argv[])
306 {   return;   }
307 #endif
308