xref: /openssl/crypto/ctype.c (revision da1c088f)
1 /*
2  * Copyright 2017-2023 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <string.h>
11 #include <stdio.h>
12 #include "crypto/ctype.h"
13 #include <openssl/ebcdic.h>
14 
15 /*
16  * Define the character classes for each character in the seven bit ASCII
17  * character set.  This is independent of the host's character set, characters
18  * are converted to ASCII before being used as an index in to this table.
19  * Characters outside of the seven bit ASCII range are detected before indexing.
20  */
21 static const unsigned short ctype_char_map[128] = {
22    /* 00 nul */ CTYPE_MASK_cntrl,
23    /* 01 soh */ CTYPE_MASK_cntrl,
24    /* 02 stx */ CTYPE_MASK_cntrl,
25    /* 03 etx */ CTYPE_MASK_cntrl,
26    /* 04 eot */ CTYPE_MASK_cntrl,
27    /* 05 enq */ CTYPE_MASK_cntrl,
28    /* 06 ack */ CTYPE_MASK_cntrl,
29    /* 07 \a  */ CTYPE_MASK_cntrl,
30    /* 08 \b  */ CTYPE_MASK_cntrl,
31    /* 09 \t  */ CTYPE_MASK_blank | CTYPE_MASK_cntrl | CTYPE_MASK_space,
32    /* 0A \n  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
33    /* 0B \v  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
34    /* 0C \f  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
35    /* 0D \r  */ CTYPE_MASK_cntrl | CTYPE_MASK_space,
36    /* 0E so  */ CTYPE_MASK_cntrl,
37    /* 0F si  */ CTYPE_MASK_cntrl,
38    /* 10 dle */ CTYPE_MASK_cntrl,
39    /* 11 dc1 */ CTYPE_MASK_cntrl,
40    /* 12 dc2 */ CTYPE_MASK_cntrl,
41    /* 13 dc3 */ CTYPE_MASK_cntrl,
42    /* 14 dc4 */ CTYPE_MASK_cntrl,
43    /* 15 nak */ CTYPE_MASK_cntrl,
44    /* 16 syn */ CTYPE_MASK_cntrl,
45    /* 17 etb */ CTYPE_MASK_cntrl,
46    /* 18 can */ CTYPE_MASK_cntrl,
47    /* 19 em  */ CTYPE_MASK_cntrl,
48    /* 1A sub */ CTYPE_MASK_cntrl,
49    /* 1B esc */ CTYPE_MASK_cntrl,
50    /* 1C fs  */ CTYPE_MASK_cntrl,
51    /* 1D gs  */ CTYPE_MASK_cntrl,
52    /* 1E rs  */ CTYPE_MASK_cntrl,
53    /* 1F us  */ CTYPE_MASK_cntrl,
54    /* 20     */ CTYPE_MASK_blank | CTYPE_MASK_print | CTYPE_MASK_space
55                 | CTYPE_MASK_asn1print,
56    /* 21  !  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
57    /* 22  "  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
58    /* 23  #  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
59    /* 24  $  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
60    /* 25  %  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
61    /* 26  &  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
62    /* 27  '  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
63                 | CTYPE_MASK_asn1print,
64    /* 28  (  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
65                 | CTYPE_MASK_asn1print,
66    /* 29  )  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
67                 | CTYPE_MASK_asn1print,
68    /* 2A  *  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
69    /* 2B  +  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
70                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
71    /* 2C  ,  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
72                 | CTYPE_MASK_asn1print,
73    /* 2D  -  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
74                 | CTYPE_MASK_asn1print,
75    /* 2E  .  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
76                 | CTYPE_MASK_asn1print,
77    /* 2F  /  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
78                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
79    /* 30  0  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
80                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
81    /* 31  1  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
82                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
83    /* 32  2  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
84                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
85    /* 33  3  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
86                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
87    /* 34  4  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
88                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
89    /* 35  5  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
90                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
91    /* 36  6  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
92                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
93    /* 37  7  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
94                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
95    /* 38  8  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
96                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
97    /* 39  9  */ CTYPE_MASK_digit | CTYPE_MASK_graph | CTYPE_MASK_print
98                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
99    /* 3A  :  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
100                 | CTYPE_MASK_asn1print,
101    /* 3B  ;  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
102    /* 3C  <  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
103    /* 3D  =  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
104                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
105    /* 3E  >  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
106    /* 3F  ?  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct
107                 | CTYPE_MASK_asn1print,
108    /* 40  @  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
109    /* 41  A  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
110                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
111    /* 42  B  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
112                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
113    /* 43  C  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
114                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
115    /* 44  D  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
116                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
117    /* 45  E  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
118                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
119    /* 46  F  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
120                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
121    /* 47  G  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
122                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
123    /* 48  H  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
124                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
125    /* 49  I  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
126                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
127    /* 4A  J  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
128                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
129    /* 4B  K  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
130                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
131    /* 4C  L  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
132                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
133    /* 4D  M  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
134                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
135    /* 4E  N  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
136                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
137    /* 4F  O  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
138                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
139    /* 50  P  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
140                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
141    /* 51  Q  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
142                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
143    /* 52  R  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
144                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
145    /* 53  S  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
146                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
147    /* 54  T  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
148                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
149    /* 55  U  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
150                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
151    /* 56  V  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
152                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
153    /* 57  W  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
154                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
155    /* 58  X  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
156                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
157    /* 59  Y  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
158                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
159    /* 5A  Z  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_upper
160                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
161    /* 5B  [  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
162    /* 5C  \  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
163    /* 5D  ]  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
164    /* 5E  ^  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
165    /* 5F  _  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
166    /* 60  `  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
167    /* 61  a  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
168                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
169    /* 62  b  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
170                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
171    /* 63  c  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
172                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
173    /* 64  d  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
174                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
175    /* 65  e  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
176                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
177    /* 66  f  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
178                 | CTYPE_MASK_xdigit | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
179    /* 67  g  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
180                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
181    /* 68  h  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
182                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
183    /* 69  i  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
184                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
185    /* 6A  j  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
186                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
187    /* 6B  k  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
188                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
189    /* 6C  l  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
190                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
191    /* 6D  m  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
192                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
193    /* 6E  n  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
194                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
195    /* 6F  o  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
196                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
197    /* 70  p  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
198                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
199    /* 71  q  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
200                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
201    /* 72  r  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
202                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
203    /* 73  s  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
204                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
205    /* 74  t  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
206                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
207    /* 75  u  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
208                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
209    /* 76  v  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
210                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
211    /* 77  w  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
212                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
213    /* 78  x  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
214                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
215    /* 79  y  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
216                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
217    /* 7A  z  */ CTYPE_MASK_graph | CTYPE_MASK_lower | CTYPE_MASK_print
218                 | CTYPE_MASK_base64 | CTYPE_MASK_asn1print,
219    /* 7B  {  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
220    /* 7C  |  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
221    /* 7D  }  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
222    /* 7E  ~  */ CTYPE_MASK_graph | CTYPE_MASK_print | CTYPE_MASK_punct,
223    /* 7F del */ CTYPE_MASK_cntrl
224 };
225 
226 #ifdef CHARSET_EBCDIC
ossl_toascii(int c)227 int ossl_toascii(int c)
228 {
229     if (c < -128 || c > 256 || c == EOF)
230         return c;
231     /*
232      * Adjust negatively signed characters.
233      * This is not required for ASCII because any character that sign extends
234      * is not seven bit and all of the checks are on the seven bit characters.
235      * I.e. any check must fail on sign extension.
236      */
237     if (c < 0)
238         c += 256;
239     return os_toascii[c];
240 }
241 
ossl_fromascii(int c)242 int ossl_fromascii(int c)
243 {
244     if (c < -128 || c > 256 || c == EOF)
245         return c;
246     if (c < 0)
247         c += 256;
248     return os_toebcdic[c];
249 }
250 #endif
251 
ossl_ctype_check(int c,unsigned int mask)252 int ossl_ctype_check(int c, unsigned int mask)
253 {
254     const int max = sizeof(ctype_char_map) / sizeof(*ctype_char_map);
255     const int a = ossl_toascii(c);
256 
257     return a >= 0 && a < max && (ctype_char_map[a] & mask) != 0;
258 }
259 
260 /*
261  * Implement some of the simpler functions directly to avoid the overhead of
262  * accessing memory via ctype_char_map[].
263  */
264 
265 #define ASCII_IS_DIGIT(c)   (c >= 0x30 && c <= 0x39)
266 #define ASCII_IS_UPPER(c)   (c >= 0x41 && c <= 0x5A)
267 #define ASCII_IS_LOWER(c)   (c >= 0x61 && c <= 0x7A)
268 
ossl_isdigit(int c)269 int ossl_isdigit(int c)
270 {
271     int a = ossl_toascii(c);
272 
273     return ASCII_IS_DIGIT(a);
274 }
275 
ossl_isupper(int c)276 int ossl_isupper(int c)
277 {
278     int a = ossl_toascii(c);
279 
280     return ASCII_IS_UPPER(a);
281 }
282 
ossl_islower(int c)283 int ossl_islower(int c)
284 {
285     int a = ossl_toascii(c);
286 
287     return ASCII_IS_LOWER(a);
288 }
289 
290 #if defined(CHARSET_EBCDIC) && !defined(CHARSET_EBCDIC_TEST)
291 static const int case_change = 0x40;
292 #else
293 static const int case_change = 0x20;
294 #endif
295 
ossl_tolower(int c)296 int ossl_tolower(int c)
297 {
298     int a = ossl_toascii(c);
299 
300     return ASCII_IS_UPPER(a) ? c ^ case_change : c;
301 }
302 
ossl_toupper(int c)303 int ossl_toupper(int c)
304 {
305     int a = ossl_toascii(c);
306 
307     return ASCII_IS_LOWER(a) ? c ^ case_change : c;
308 }
309 
ossl_ascii_isdigit(int c)310 int ossl_ascii_isdigit(int c)
311 {
312     return ASCII_IS_DIGIT(c);
313 }
314