xref: /openssl/crypto/asn1/a_utf8.c (revision ba64e5a9)
1 /*
2  * Copyright 1995-2021 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <stdio.h>
11 #include "internal/cryptlib.h"
12 #include "internal/unicode.h"
13 #include <openssl/asn1.h>
14 
15 /* UTF8 utilities */
16 
17 /*-
18  * This parses a UTF8 string one character at a time. It is passed a pointer
19  * to the string and the length of the string. It sets 'value' to the value of
20  * the current character. It returns the number of characters read or a
21  * negative error code:
22  * -1 = string too short
23  * -2 = illegal character
24  * -3 = subsequent characters not of the form 10xxxxxx
25  * -4 = character encoded incorrectly (not minimal length).
26  */
27 
UTF8_getc(const unsigned char * str,int len,unsigned long * val)28 int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
29 {
30     const unsigned char *p;
31     unsigned long value;
32     int ret;
33     if (len <= 0)
34         return 0;
35     p = str;
36 
37     /* Check syntax and work out the encoded value (if correct) */
38     if ((*p & 0x80) == 0) {
39         value = *p++ & 0x7f;
40         ret = 1;
41     } else if ((*p & 0xe0) == 0xc0) {
42         if (len < 2)
43             return -1;
44         if ((p[1] & 0xc0) != 0x80)
45             return -3;
46         value = (*p++ & 0x1f) << 6;
47         value |= *p++ & 0x3f;
48         if (value < 0x80)
49             return -4;
50         ret = 2;
51     } else if ((*p & 0xf0) == 0xe0) {
52         if (len < 3)
53             return -1;
54         if (((p[1] & 0xc0) != 0x80)
55             || ((p[2] & 0xc0) != 0x80))
56             return -3;
57         value = (*p++ & 0xf) << 12;
58         value |= (*p++ & 0x3f) << 6;
59         value |= *p++ & 0x3f;
60         if (value < 0x800)
61             return -4;
62         if (is_unicode_surrogate(value))
63             return -2;
64         ret = 3;
65     } else if ((*p & 0xf8) == 0xf0) {
66         if (len < 4)
67             return -1;
68         if (((p[1] & 0xc0) != 0x80)
69             || ((p[2] & 0xc0) != 0x80)
70             || ((p[3] & 0xc0) != 0x80))
71             return -3;
72         value = ((unsigned long)(*p++ & 0x7)) << 18;
73         value |= (*p++ & 0x3f) << 12;
74         value |= (*p++ & 0x3f) << 6;
75         value |= *p++ & 0x3f;
76         if (value < 0x10000)
77             return -4;
78         ret = 4;
79     } else
80         return -2;
81     *val = value;
82     return ret;
83 }
84 
85 /*
86  * This takes a character 'value' and writes the UTF8 encoded value in 'str'
87  * where 'str' is a buffer containing 'len' characters. Returns the number of
88  * characters written, -1 if 'len' is too small or -2 if 'value' is out of
89  * range. 'str' can be set to NULL in which case it just returns the number of
90  * characters. It will need at most 4 characters.
91  */
92 
UTF8_putc(unsigned char * str,int len,unsigned long value)93 int UTF8_putc(unsigned char *str, int len, unsigned long value)
94 {
95     if (!str)
96         len = 4;                /* Maximum we will need */
97     else if (len <= 0)
98         return -1;
99     if (value < 0x80) {
100         if (str)
101             *str = (unsigned char)value;
102         return 1;
103     }
104     if (value < 0x800) {
105         if (len < 2)
106             return -1;
107         if (str) {
108             *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
109             *str = (unsigned char)((value & 0x3f) | 0x80);
110         }
111         return 2;
112     }
113     if (value < 0x10000) {
114         if (is_unicode_surrogate(value))
115             return -2;
116         if (len < 3)
117             return -1;
118         if (str) {
119             *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
120             *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
121             *str = (unsigned char)((value & 0x3f) | 0x80);
122         }
123         return 3;
124     }
125     if (value < UNICODE_LIMIT) {
126         if (len < 4)
127             return -1;
128         if (str) {
129             *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
130             *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
131             *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
132             *str = (unsigned char)((value & 0x3f) | 0x80);
133         }
134         return 4;
135     }
136     return -2;
137 }
138