xref: /PHP-7.4/ext/pcre/pcre2lib/pcre2_internal.h (revision 9f2d0395)
1e623e535SAndrei Zmievski /*************************************************
2e623e535SAndrei Zmievski *      Perl-Compatible Regular Expressions       *
3e623e535SAndrei Zmievski *************************************************/
4e623e535SAndrei Zmievski 
5a5bc5aedSAnatol Belski /* PCRE2 is a library of functions to support regular expressions whose syntax
6e623e535SAndrei Zmievski and semantics are as close as possible to those of the Perl 5 language.
7e623e535SAndrei Zmievski 
8e623e535SAndrei Zmievski                        Written by Philip Hazel
9a5bc5aedSAnatol Belski      Original API code Copyright (c) 1997-2012 University of Cambridge
10*9f2d0395SChristoph M. Becker           New API code Copyright (c) 2016-2020 University of Cambridge
11e623e535SAndrei Zmievski 
12e623e535SAndrei Zmievski -----------------------------------------------------------------------------
13e623e535SAndrei Zmievski Redistribution and use in source and binary forms, with or without
14e623e535SAndrei Zmievski modification, are permitted provided that the following conditions are met:
15e623e535SAndrei Zmievski 
16e623e535SAndrei Zmievski     * Redistributions of source code must retain the above copyright notice,
17e623e535SAndrei Zmievski       this list of conditions and the following disclaimer.
18e623e535SAndrei Zmievski 
19e623e535SAndrei Zmievski     * Redistributions in binary form must reproduce the above copyright
20e623e535SAndrei Zmievski       notice, this list of conditions and the following disclaimer in the
21e623e535SAndrei Zmievski       documentation and/or other materials provided with the distribution.
22e623e535SAndrei Zmievski 
23e623e535SAndrei Zmievski     * Neither the name of the University of Cambridge nor the names of its
24e623e535SAndrei Zmievski       contributors may be used to endorse or promote products derived from
25e623e535SAndrei Zmievski       this software without specific prior written permission.
26e623e535SAndrei Zmievski 
27e623e535SAndrei Zmievski THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28e623e535SAndrei Zmievski AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29e623e535SAndrei Zmievski IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30e623e535SAndrei Zmievski ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31e623e535SAndrei Zmievski LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32e623e535SAndrei Zmievski CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33e623e535SAndrei Zmievski SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34e623e535SAndrei Zmievski INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35e623e535SAndrei Zmievski CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36e623e535SAndrei Zmievski ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37e623e535SAndrei Zmievski POSSIBILITY OF SUCH DAMAGE.
38e623e535SAndrei Zmievski -----------------------------------------------------------------------------
39e623e535SAndrei Zmievski */
40e623e535SAndrei Zmievski 
41703e0370SAnatol Belski #ifndef PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
42703e0370SAnatol Belski #define PCRE2_INTERNAL_H_IDEMPOTENT_GUARD
43703e0370SAnatol Belski 
44a5bc5aedSAnatol Belski /* We do not support both EBCDIC and Unicode at the same time. The "configure"
45a5bc5aedSAnatol Belski script prevents both being selected, but not everybody uses "configure". EBCDIC
46a5bc5aedSAnatol Belski is only supported for the 8-bit library, but the check for this has to be later
47a5bc5aedSAnatol Belski in this file, because the first part is not width-dependent, and is included by
48a5bc5aedSAnatol Belski pcre2test.c with CODE_UNIT_WIDTH == 0. */
4990a2d197SNuno Lopes 
50a5bc5aedSAnatol Belski #if defined EBCDIC && defined SUPPORT_UNICODE
51a5bc5aedSAnatol Belski #error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
52276c5de0SAnatoliy Belsky #endif
53276c5de0SAnatoliy Belsky 
54a5bc5aedSAnatol Belski /* Standard C headers */
55e623e535SAndrei Zmievski 
56e623e535SAndrei Zmievski #include <ctype.h>
57e623e535SAndrei Zmievski #include <limits.h>
58e623e535SAndrei Zmievski #include <stddef.h>
59e623e535SAndrei Zmievski #include <stdio.h>
60e623e535SAndrei Zmievski #include <stdlib.h>
61e623e535SAndrei Zmievski #include <string.h>
62e623e535SAndrei Zmievski 
63a5bc5aedSAnatol Belski /* Macros to make boolean values more obvious. The #ifndef is to pacify
64a5bc5aedSAnatol Belski compiler warnings in environments where these macros are defined elsewhere.
65a5bc5aedSAnatol Belski Unfortunately, there is no way to do the same for the typedef. */
66a5bc5aedSAnatol Belski 
67a5bc5aedSAnatol Belski typedef int BOOL;
68a5bc5aedSAnatol Belski #ifndef FALSE
69a5bc5aedSAnatol Belski #define FALSE   0
70a5bc5aedSAnatol Belski #define TRUE    1
71a5bc5aedSAnatol Belski #endif
72a5bc5aedSAnatol Belski 
73357ab3cbSAnatol Belski /* Valgrind (memcheck) support */
74357ab3cbSAnatol Belski 
75357ab3cbSAnatol Belski #ifdef SUPPORT_VALGRIND
76357ab3cbSAnatol Belski #include <valgrind/memcheck.h>
77357ab3cbSAnatol Belski #endif
78357ab3cbSAnatol Belski 
79*9f2d0395SChristoph M. Becker /* -ftrivial-auto-var-init support supports initializing all local variables
80*9f2d0395SChristoph M. Becker to avoid some classes of bug, but this can cause an unacceptable slowdown
81*9f2d0395SChristoph M. Becker for large on-stack arrays in hot functions. This macro lets us annotate
82*9f2d0395SChristoph M. Becker such arrays. */
83*9f2d0395SChristoph M. Becker 
84*9f2d0395SChristoph M. Becker #ifdef HAVE_ATTRIBUTE_UNINITIALIZED
85*9f2d0395SChristoph M. Becker #define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
86*9f2d0395SChristoph M. Becker #else
87*9f2d0395SChristoph M. Becker #define PCRE2_KEEP_UNINITIALIZED
88*9f2d0395SChristoph M. Becker #endif
89*9f2d0395SChristoph M. Becker 
90a5bc5aedSAnatol Belski /* Older versions of MSVC lack snprintf(). This define allows for
91a5bc5aedSAnatol Belski warning/error-free compilation and testing with MSVC compilers back to at least
92a5bc5aedSAnatol Belski MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
93a5bc5aedSAnatol Belski 
94a5bc5aedSAnatol Belski #if defined(_MSC_VER) && (_MSC_VER < 1900)
95a5bc5aedSAnatol Belski #define snprintf _snprintf
96a5bc5aedSAnatol Belski #endif
97a5bc5aedSAnatol Belski 
984e51d2ecSNuno Lopes /* When compiling a DLL for Windows, the exported symbols have to be declared
994e51d2ecSNuno Lopes using some MS magic. I found some useful information on this web page:
1004e51d2ecSNuno Lopes http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
1014e51d2ecSNuno Lopes information there, using __declspec(dllexport) without "extern" we have a
1024e51d2ecSNuno Lopes definition; with "extern" we have a declaration. The settings here override the
103a5bc5aedSAnatol Belski setting in pcre2.h (which is included below); it defines only PCRE2_EXP_DECL,
1044e51d2ecSNuno Lopes which is all that is needed for applications (they just import the symbols). We
1054e51d2ecSNuno Lopes use:
1064e51d2ecSNuno Lopes 
107a5bc5aedSAnatol Belski   PCRE2_EXP_DECL    for declarations
108a5bc5aedSAnatol Belski   PCRE2_EXP_DEFN    for definitions
1094e51d2ecSNuno Lopes 
110a5bc5aedSAnatol Belski The reason for wrapping this in #ifndef PCRE2_EXP_DECL is so that pcre2test,
1114e51d2ecSNuno Lopes which is an application, but needs to import this file in order to "peek" at
112a5bc5aedSAnatol Belski internals, can #include pcre2.h first to get an application's-eye view.
1134e51d2ecSNuno Lopes 
1144e51d2ecSNuno Lopes In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
1154e51d2ecSNuno Lopes special-purpose environments) might want to stick other stuff in front of
116a5bc5aedSAnatol Belski exported symbols. That's why, in the non-Windows case, we set PCRE2_EXP_DEFN
117a5bc5aedSAnatol Belski only if it is not already set. */
1184e51d2ecSNuno Lopes 
119a5bc5aedSAnatol Belski #ifndef PCRE2_EXP_DECL
1204e51d2ecSNuno Lopes #  ifdef _WIN32
121a5bc5aedSAnatol Belski #    ifndef PCRE2_STATIC
122a5bc5aedSAnatol Belski #      define PCRE2_EXP_DECL       extern __declspec(dllexport)
123a5bc5aedSAnatol Belski #      define PCRE2_EXP_DEFN       __declspec(dllexport)
1244e51d2ecSNuno Lopes #    else
125a5bc5aedSAnatol Belski #      define PCRE2_EXP_DECL       extern
126a5bc5aedSAnatol Belski #      define PCRE2_EXP_DEFN
1274e51d2ecSNuno Lopes #    endif
1284e51d2ecSNuno Lopes #  else
1294e51d2ecSNuno Lopes #    ifdef __cplusplus
130a5bc5aedSAnatol Belski #      define PCRE2_EXP_DECL       extern "C"
1314e51d2ecSNuno Lopes #    else
132a5bc5aedSAnatol Belski #      define PCRE2_EXP_DECL       extern
1334e51d2ecSNuno Lopes #    endif
134a5bc5aedSAnatol Belski #    ifndef PCRE2_EXP_DEFN
135a5bc5aedSAnatol Belski #      define PCRE2_EXP_DEFN       PCRE2_EXP_DECL
1364e51d2ecSNuno Lopes #    endif
1374e51d2ecSNuno Lopes #  endif
138e623e535SAndrei Zmievski #endif
139e623e535SAndrei Zmievski 
140a5bc5aedSAnatol Belski /* Include the public PCRE2 header and the definitions of UCP character
141a5bc5aedSAnatol Belski property values. This must follow the setting of PCRE2_EXP_DECL above. */
142c778b3ebSNuno Lopes 
143a5bc5aedSAnatol Belski #include "pcre2.h"
144a5bc5aedSAnatol Belski #include "pcre2_ucp.h"
145c778b3ebSNuno Lopes 
146a5bc5aedSAnatol Belski /* When PCRE2 is compiled as a C++ library, the subject pointer can be replaced
147a5bc5aedSAnatol Belski with a custom type. This makes it possible, for example, to allow pcre2_match()
148a5bc5aedSAnatol Belski to process subject strings that are discontinuous by using a smart pointer
149a5bc5aedSAnatol Belski class. It must always be possible to inspect all of the subject string in
150a5bc5aedSAnatol Belski pcre2_match() because of the way it backtracks. */
151c778b3ebSNuno Lopes 
152a5bc5aedSAnatol Belski /* WARNING: This is as yet untested for PCRE2. */
153c778b3ebSNuno Lopes 
154a5bc5aedSAnatol Belski #ifdef CUSTOM_SUBJECT_PTR
155a5bc5aedSAnatol Belski #undef PCRE2_SPTR
156a5bc5aedSAnatol Belski #define PCRE2_SPTR CUSTOM_SUBJECT_PTR
157e623e535SAndrei Zmievski #endif
158e623e535SAndrei Zmievski 
159a5bc5aedSAnatol Belski /* When checking for integer overflow in pcre2_compile(), we need to handle
16050016d9dSIlia Alshanetsky large integers. If a 64-bit integer type is available, we can use that.
16150016d9dSIlia Alshanetsky Otherwise we have to cast to double, which of course requires floating point
162aa9433e9SAnatol Belski arithmetic. Handle this by defining a macro for the appropriate type. */
16350016d9dSIlia Alshanetsky 
16450016d9dSIlia Alshanetsky #if defined INT64_MAX || defined int64_t
16550016d9dSIlia Alshanetsky #define INT64_OR_DOUBLE int64_t
16650016d9dSIlia Alshanetsky #else
16750016d9dSIlia Alshanetsky #define INT64_OR_DOUBLE double
16850016d9dSIlia Alshanetsky #endif
16950016d9dSIlia Alshanetsky 
170d918e077SAnatol Belski /* External (in the C sense) functions and tables that are private to the
171d918e077SAnatol Belski libraries are always referenced using the PRIV macro. This makes it possible
172d918e077SAnatol Belski for pcre2test.c to include some of the source files from the libraries using a
173d918e077SAnatol Belski different PRIV definition to avoid name clashes. It also makes it clear in the
174d918e077SAnatol Belski code that a non-static object is being referenced. */
175d918e077SAnatol Belski 
176d918e077SAnatol Belski #ifndef PRIV
177d918e077SAnatol Belski #define PRIV(name) _pcre2_##name
178d918e077SAnatol Belski #endif
179d918e077SAnatol Belski 
180e623e535SAndrei Zmievski /* When compiling for use with the Virtual Pascal compiler, these functions
181a5bc5aedSAnatol Belski need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
182e623e535SAndrei Zmievski option on the command line. */
183e623e535SAndrei Zmievski 
184e623e535SAndrei Zmievski #ifdef VPCOMPAT
1854e51d2ecSNuno Lopes #define strlen(s)        _strlen(s)
186e623e535SAndrei Zmievski #define strncmp(s1,s2,m) _strncmp(s1,s2,m)
1874e51d2ecSNuno Lopes #define memcmp(s,c,n)    _memcmp(s,c,n)
188e623e535SAndrei Zmievski #define memcpy(d,s,n)    _memcpy(d,s,n)
189e623e535SAndrei Zmievski #define memmove(d,s,n)   _memmove(d,s,n)
190e623e535SAndrei Zmievski #define memset(s,c,n)    _memset(s,c,n)
191e623e535SAndrei Zmievski #else  /* VPCOMPAT */
192e623e535SAndrei Zmievski 
193d918e077SAnatol Belski /* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
194d918e077SAnatol Belski a macro that calls an emulating function. */
195e623e535SAndrei Zmievski 
1964e51d2ecSNuno Lopes #ifndef HAVE_MEMMOVE
197d918e077SAnatol Belski #undef  memmove          /* Some systems may have a macro */
198d918e077SAnatol Belski #define memmove(a, b, c) PRIV(memmove)(a, b, c)
199e623e535SAndrei Zmievski #endif   /* not HAVE_MEMMOVE */
200e623e535SAndrei Zmievski #endif   /* not VPCOMPAT */
201e623e535SAndrei Zmievski 
202a5bc5aedSAnatol Belski /* This is an unsigned int value that no UTF character can ever have, as
203a5bc5aedSAnatol Belski Unicode doesn't go beyond 0x0010ffff. */
204276c5de0SAnatoliy Belsky 
205a5bc5aedSAnatol Belski #define NOTACHAR 0xffffffff
206276c5de0SAnatoliy Belsky 
207a5bc5aedSAnatol Belski /* This is the largest valid UTF/Unicode code point. */
208276c5de0SAnatoliy Belsky 
209a5bc5aedSAnatol Belski #define MAX_UTF_CODE_POINT 0x10ffff
210276c5de0SAnatoliy Belsky 
211a5bc5aedSAnatol Belski /* Compile-time positive error numbers (all except UTF errors, which are
212a5bc5aedSAnatol Belski negative) start at this value. It should probably never be changed, in case
213a5bc5aedSAnatol Belski some application is checking for specific numbers. There is a copy of this
214a5bc5aedSAnatol Belski #define in pcre2posix.c (which now no longer includes this file). Ideally, a
215a5bc5aedSAnatol Belski way of having a single definition should be found, but as the number is
216a5bc5aedSAnatol Belski unlikely to change, this is not a pressing issue. The original reason for
217a5bc5aedSAnatol Belski having a base other than 0 was to keep the absolute values of compile-time and
218a5bc5aedSAnatol Belski run-time error numbers numerically different, but in the event the code does
219a5bc5aedSAnatol Belski not rely on this. */
220276c5de0SAnatoliy Belsky 
221a5bc5aedSAnatol Belski #define COMPILE_ERROR_BASE 100
222e623e535SAndrei Zmievski 
223a5bc5aedSAnatol Belski /* The initial frames vector for remembering backtracking points in
224a5bc5aedSAnatol Belski pcre2_match() is allocated on the system stack, of this size (bytes). The size
225a5bc5aedSAnatol Belski must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
226a5bc5aedSAnatol Belski multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
227d918e077SAnatol Belski on the number of capturing parentheses) so 20KiB handles quite a few frames. A
228a5bc5aedSAnatol Belski larger vector on the heap is obtained for patterns that need more frames. The
229a5bc5aedSAnatol Belski maximum size of this can be limited. */
230276c5de0SAnatoliy Belsky 
231a5bc5aedSAnatol Belski #define START_FRAMES_SIZE 20480
232276c5de0SAnatoliy Belsky 
233d918e077SAnatol Belski /* Similarly, for DFA matching, an initial internal workspace vector is
234d918e077SAnatol Belski allocated on the stack. */
235d918e077SAnatol Belski 
236d918e077SAnatol Belski #define DFA_START_RWS_SIZE 30720
237d918e077SAnatol Belski 
238a5bc5aedSAnatol Belski /* Define the default BSR convention. */
239e623e535SAndrei Zmievski 
240a5bc5aedSAnatol Belski #ifdef BSR_ANYCRLF
241a5bc5aedSAnatol Belski #define BSR_DEFAULT PCRE2_BSR_ANYCRLF
242e623e535SAndrei Zmievski #else
243a5bc5aedSAnatol Belski #define BSR_DEFAULT PCRE2_BSR_UNICODE
244e623e535SAndrei Zmievski #endif
245e623e535SAndrei Zmievski 
246357ab3cbSAnatol Belski 
247a5bc5aedSAnatol Belski /* ---------------- Basic UTF-8 macros ---------------- */
248357ab3cbSAnatol Belski 
249a5bc5aedSAnatol Belski /* These UTF-8 macros are always defined because they are used in pcre2test for
250a5bc5aedSAnatol Belski handling wide characters in 16-bit and 32-bit modes, even if an 8-bit library
251a5bc5aedSAnatol Belski is not supported. */
252357ab3cbSAnatol Belski 
253a5bc5aedSAnatol Belski /* Tests whether a UTF-8 code point needs extra bytes to decode. */
254276c5de0SAnatoliy Belsky 
255357ab3cbSAnatol Belski #define HASUTF8EXTRALEN(c) ((c) >= 0xc0)
256276c5de0SAnatoliy Belsky 
257a5bc5aedSAnatol Belski /* The following macros were originally written in the form of loops that used
258a5bc5aedSAnatol Belski data from the tables whose names start with PRIV(utf8_table). They were
259a5bc5aedSAnatol Belski rewritten by a user so as not to use loops, because in some environments this
260a5bc5aedSAnatol Belski gives a significant performance advantage, and it seems never to do any harm.
261a5bc5aedSAnatol Belski */
262a5bc5aedSAnatol Belski 
26334c6447eSIlia Alshanetsky /* Base macro to pick up the remaining bytes of a UTF-8 character, not
26434c6447eSIlia Alshanetsky advancing the pointer. */
26534c6447eSIlia Alshanetsky 
26634c6447eSIlia Alshanetsky #define GETUTF8(c, eptr) \
26734c6447eSIlia Alshanetsky     { \
268a5bc5aedSAnatol Belski     if ((c & 0x20u) == 0) \
269a5bc5aedSAnatol Belski       c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
270a5bc5aedSAnatol Belski     else if ((c & 0x10u) == 0) \
271a5bc5aedSAnatol Belski       c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
272a5bc5aedSAnatol Belski     else if ((c & 0x08u) == 0) \
273a5bc5aedSAnatol Belski       c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
274a5bc5aedSAnatol Belski       ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
275a5bc5aedSAnatol Belski     else if ((c & 0x04u) == 0) \
276a5bc5aedSAnatol Belski       c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
277a5bc5aedSAnatol Belski           ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
278a5bc5aedSAnatol Belski           (eptr[4] & 0x3fu); \
27934c6447eSIlia Alshanetsky     else \
280a5bc5aedSAnatol Belski       c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
281a5bc5aedSAnatol Belski           ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
282a5bc5aedSAnatol Belski           ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
28334c6447eSIlia Alshanetsky     }
28434c6447eSIlia Alshanetsky 
28534c6447eSIlia Alshanetsky /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
28634c6447eSIlia Alshanetsky the pointer. */
28734c6447eSIlia Alshanetsky 
28834c6447eSIlia Alshanetsky #define GETUTF8INC(c, eptr) \
289e623e535SAndrei Zmievski     { \
290a5bc5aedSAnatol Belski     if ((c & 0x20u) == 0) \
291a5bc5aedSAnatol Belski       c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
292a5bc5aedSAnatol Belski     else if ((c & 0x10u) == 0) \
29334c6447eSIlia Alshanetsky       { \
294a5bc5aedSAnatol Belski       c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
29534c6447eSIlia Alshanetsky       eptr += 2; \
29634c6447eSIlia Alshanetsky       } \
297a5bc5aedSAnatol Belski     else if ((c & 0x08u) == 0) \
29834c6447eSIlia Alshanetsky       { \
299a5bc5aedSAnatol Belski       c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
300a5bc5aedSAnatol Belski           ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
30134c6447eSIlia Alshanetsky       eptr += 3; \
30234c6447eSIlia Alshanetsky       } \
303a5bc5aedSAnatol Belski     else if ((c & 0x04u) == 0) \
304e623e535SAndrei Zmievski       { \
305a5bc5aedSAnatol Belski       c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
306a5bc5aedSAnatol Belski           ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
307a5bc5aedSAnatol Belski           (eptr[3] & 0x3fu); \
30834c6447eSIlia Alshanetsky       eptr += 4; \
30934c6447eSIlia Alshanetsky       } \
31034c6447eSIlia Alshanetsky     else \
31134c6447eSIlia Alshanetsky       { \
312a5bc5aedSAnatol Belski       c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
313a5bc5aedSAnatol Belski           ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
314a5bc5aedSAnatol Belski           ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
31534c6447eSIlia Alshanetsky       eptr += 5; \
316e623e535SAndrei Zmievski       } \
317e623e535SAndrei Zmievski     }
318e623e535SAndrei Zmievski 
31934c6447eSIlia Alshanetsky /* Base macro to pick up the remaining bytes of a UTF-8 character, not
32034c6447eSIlia Alshanetsky advancing the pointer, incrementing the length. */
32134c6447eSIlia Alshanetsky 
32234c6447eSIlia Alshanetsky #define GETUTF8LEN(c, eptr, len) \
323e623e535SAndrei Zmievski     { \
324a5bc5aedSAnatol Belski     if ((c & 0x20u) == 0) \
32534c6447eSIlia Alshanetsky       { \
326a5bc5aedSAnatol Belski       c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
32734c6447eSIlia Alshanetsky       len++; \
32834c6447eSIlia Alshanetsky       } \
329a5bc5aedSAnatol Belski     else if ((c & 0x10u)  == 0) \
330e623e535SAndrei Zmievski       { \
331a5bc5aedSAnatol Belski       c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
33234c6447eSIlia Alshanetsky       len += 2; \
33334c6447eSIlia Alshanetsky       } \
334a5bc5aedSAnatol Belski     else if ((c & 0x08u)  == 0) \
33534c6447eSIlia Alshanetsky       {\
336a5bc5aedSAnatol Belski       c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
337a5bc5aedSAnatol Belski           ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
33834c6447eSIlia Alshanetsky       len += 3; \
33934c6447eSIlia Alshanetsky       } \
340a5bc5aedSAnatol Belski     else if ((c & 0x04u)  == 0) \
34134c6447eSIlia Alshanetsky       { \
342a5bc5aedSAnatol Belski       c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
343a5bc5aedSAnatol Belski           ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
344a5bc5aedSAnatol Belski           (eptr[4] & 0x3fu); \
34534c6447eSIlia Alshanetsky       len += 4; \
34634c6447eSIlia Alshanetsky       } \
34734c6447eSIlia Alshanetsky     else \
34834c6447eSIlia Alshanetsky       {\
349a5bc5aedSAnatol Belski       c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
350a5bc5aedSAnatol Belski           ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
351a5bc5aedSAnatol Belski           ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
35234c6447eSIlia Alshanetsky       len += 5; \
353e623e535SAndrei Zmievski       } \
354e623e535SAndrei Zmievski     }
355e623e535SAndrei Zmievski 
356a5bc5aedSAnatol Belski /* --------------- Whitespace macros ---------------- */
357e623e535SAndrei Zmievski 
358357ab3cbSAnatol Belski /* Tests for Unicode horizontal and vertical whitespace characters must check a
359357ab3cbSAnatol Belski number of different values. Using a switch statement for this generates the
360357ab3cbSAnatol Belski fastest code (no loop, no memory access), and there are several places in the
361357ab3cbSAnatol Belski interpreter code where this happens. In order to ensure that all the case lists
362357ab3cbSAnatol Belski remain in step, we use macros so that there is only one place where the lists
363357ab3cbSAnatol Belski are defined.
364357ab3cbSAnatol Belski 
365a5bc5aedSAnatol Belski These values are also required as lists in pcre2_compile.c when processing \h,
366a5bc5aedSAnatol Belski \H, \v and \V in a character class. The lists are defined in pcre2_tables.c,
367a5bc5aedSAnatol Belski but macros that define the values are here so that all the definitions are
368357ab3cbSAnatol Belski together. The lists must be in ascending character order, terminated by
369357ab3cbSAnatol Belski NOTACHAR (which is 0xffffffff).
370357ab3cbSAnatol Belski 
371357ab3cbSAnatol Belski Any changes should ensure that the various macros are kept in step with each
372a5bc5aedSAnatol Belski other. NOTE: The values also appear in pcre2_jit_compile.c. */
373357ab3cbSAnatol Belski 
374a5bc5aedSAnatol Belski /* -------------- ASCII/Unicode environments -------------- */
375357ab3cbSAnatol Belski 
376357ab3cbSAnatol Belski #ifndef EBCDIC
377357ab3cbSAnatol Belski 
378a5bc5aedSAnatol Belski /* Character U+180E (Mongolian Vowel Separator) is not included in the list of
379a5bc5aedSAnatol Belski spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
380a5bc5aedSAnatol Belski space. However, in many other sources it is listed as a space and has been in
381a5bc5aedSAnatol Belski PCRE (both APIs) for a long time. */
382a5bc5aedSAnatol Belski 
383357ab3cbSAnatol Belski #define HSPACE_LIST \
384ca02d9c2SStanislav Malyshev   CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
385357ab3cbSAnatol Belski   0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
386357ab3cbSAnatol Belski   0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
387357ab3cbSAnatol Belski   NOTACHAR
388357ab3cbSAnatol Belski 
389357ab3cbSAnatol Belski #define HSPACE_MULTIBYTE_CASES \
390357ab3cbSAnatol Belski   case 0x1680:  /* OGHAM SPACE MARK */ \
391357ab3cbSAnatol Belski   case 0x180e:  /* MONGOLIAN VOWEL SEPARATOR */ \
392357ab3cbSAnatol Belski   case 0x2000:  /* EN QUAD */ \
393357ab3cbSAnatol Belski   case 0x2001:  /* EM QUAD */ \
394357ab3cbSAnatol Belski   case 0x2002:  /* EN SPACE */ \
395357ab3cbSAnatol Belski   case 0x2003:  /* EM SPACE */ \
396357ab3cbSAnatol Belski   case 0x2004:  /* THREE-PER-EM SPACE */ \
397357ab3cbSAnatol Belski   case 0x2005:  /* FOUR-PER-EM SPACE */ \
398357ab3cbSAnatol Belski   case 0x2006:  /* SIX-PER-EM SPACE */ \
399357ab3cbSAnatol Belski   case 0x2007:  /* FIGURE SPACE */ \
400357ab3cbSAnatol Belski   case 0x2008:  /* PUNCTUATION SPACE */ \
401357ab3cbSAnatol Belski   case 0x2009:  /* THIN SPACE */ \
402357ab3cbSAnatol Belski   case 0x200A:  /* HAIR SPACE */ \
403357ab3cbSAnatol Belski   case 0x202f:  /* NARROW NO-BREAK SPACE */ \
404357ab3cbSAnatol Belski   case 0x205f:  /* MEDIUM MATHEMATICAL SPACE */ \
405357ab3cbSAnatol Belski   case 0x3000   /* IDEOGRAPHIC SPACE */
406357ab3cbSAnatol Belski 
407357ab3cbSAnatol Belski #define HSPACE_BYTE_CASES \
408357ab3cbSAnatol Belski   case CHAR_HT: \
409357ab3cbSAnatol Belski   case CHAR_SPACE: \
410ca02d9c2SStanislav Malyshev   case CHAR_NBSP
411357ab3cbSAnatol Belski 
412357ab3cbSAnatol Belski #define HSPACE_CASES \
413357ab3cbSAnatol Belski   HSPACE_BYTE_CASES: \
414357ab3cbSAnatol Belski   HSPACE_MULTIBYTE_CASES
415357ab3cbSAnatol Belski 
416357ab3cbSAnatol Belski #define VSPACE_LIST \
417357ab3cbSAnatol Belski   CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
418357ab3cbSAnatol Belski 
419357ab3cbSAnatol Belski #define VSPACE_MULTIBYTE_CASES \
420357ab3cbSAnatol Belski   case 0x2028:    /* LINE SEPARATOR */ \
421357ab3cbSAnatol Belski   case 0x2029     /* PARAGRAPH SEPARATOR */
422357ab3cbSAnatol Belski 
423357ab3cbSAnatol Belski #define VSPACE_BYTE_CASES \
424357ab3cbSAnatol Belski   case CHAR_LF: \
425357ab3cbSAnatol Belski   case CHAR_VT: \
426357ab3cbSAnatol Belski   case CHAR_FF: \
427357ab3cbSAnatol Belski   case CHAR_CR: \
428357ab3cbSAnatol Belski   case CHAR_NEL
429357ab3cbSAnatol Belski 
430357ab3cbSAnatol Belski #define VSPACE_CASES \
431357ab3cbSAnatol Belski   VSPACE_BYTE_CASES: \
432357ab3cbSAnatol Belski   VSPACE_MULTIBYTE_CASES
433357ab3cbSAnatol Belski 
434a5bc5aedSAnatol Belski /* -------------- EBCDIC environments -------------- */
435e623e535SAndrei Zmievski 
436357ab3cbSAnatol Belski #else
437ca02d9c2SStanislav Malyshev #define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
438e623e535SAndrei Zmievski 
439357ab3cbSAnatol Belski #define HSPACE_BYTE_CASES \
440357ab3cbSAnatol Belski   case CHAR_HT: \
441ca02d9c2SStanislav Malyshev   case CHAR_SPACE: \
442ca02d9c2SStanislav Malyshev   case CHAR_NBSP
443357ab3cbSAnatol Belski 
444357ab3cbSAnatol Belski #define HSPACE_CASES HSPACE_BYTE_CASES
445357ab3cbSAnatol Belski 
446357ab3cbSAnatol Belski #ifdef EBCDIC_NL25
447357ab3cbSAnatol Belski #define VSPACE_LIST \
448357ab3cbSAnatol Belski   CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
449357ab3cbSAnatol Belski #else
450357ab3cbSAnatol Belski #define VSPACE_LIST \
451357ab3cbSAnatol Belski   CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
452e623e535SAndrei Zmievski #endif
453e623e535SAndrei Zmievski 
454357ab3cbSAnatol Belski #define VSPACE_BYTE_CASES \
455357ab3cbSAnatol Belski   case CHAR_LF: \
456357ab3cbSAnatol Belski   case CHAR_VT: \
457357ab3cbSAnatol Belski   case CHAR_FF: \
458357ab3cbSAnatol Belski   case CHAR_CR: \
459357ab3cbSAnatol Belski   case CHAR_NEL
460357ab3cbSAnatol Belski 
461357ab3cbSAnatol Belski #define VSPACE_CASES VSPACE_BYTE_CASES
462357ab3cbSAnatol Belski #endif  /* EBCDIC */
463357ab3cbSAnatol Belski 
464a5bc5aedSAnatol Belski /* -------------- End of whitespace macros -------------- */
465e623e535SAndrei Zmievski 
466e623e535SAndrei Zmievski 
467a5bc5aedSAnatol Belski /* PCRE2 is able to support several different kinds of newline (CR, LF, CRLF,
468a5bc5aedSAnatol Belski "any" and "anycrlf" at present). The following macros are used to package up
469a5bc5aedSAnatol Belski testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
470a5bc5aedSAnatol Belski modules to indicate in which datablock the parameters exist, and what the
471a5bc5aedSAnatol Belski start/end of string field names are. */
472e623e535SAndrei Zmievski 
473a5bc5aedSAnatol Belski #define NLTYPE_FIXED    0     /* Newline is a fixed length string */
474a5bc5aedSAnatol Belski #define NLTYPE_ANY      1     /* Newline is any Unicode line ending */
475a5bc5aedSAnatol Belski #define NLTYPE_ANYCRLF  2     /* Newline is CR, LF, or CRLF */
476b3e66c61SNuno Lopes 
477a5bc5aedSAnatol Belski /* This macro checks for a newline at the given position */
478e623e535SAndrei Zmievski 
479a5bc5aedSAnatol Belski #define IS_NEWLINE(p) \
480a5bc5aedSAnatol Belski   ((NLBLOCK->nltype != NLTYPE_FIXED)? \
481a5bc5aedSAnatol Belski     ((p) < NLBLOCK->PSEND && \
482a5bc5aedSAnatol Belski      PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
483a5bc5aedSAnatol Belski        &(NLBLOCK->nllen), utf)) \
484a5bc5aedSAnatol Belski     : \
485a5bc5aedSAnatol Belski     ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
486a5bc5aedSAnatol Belski      UCHAR21TEST(p) == NLBLOCK->nl[0] && \
487a5bc5aedSAnatol Belski      (NLBLOCK->nllen == 1 || UCHAR21TEST(p+1) == NLBLOCK->nl[1])       \
488a5bc5aedSAnatol Belski     ) \
489a5bc5aedSAnatol Belski   )
490e623e535SAndrei Zmievski 
491a5bc5aedSAnatol Belski /* This macro checks for a newline immediately preceding the given position */
492e623e535SAndrei Zmievski 
493a5bc5aedSAnatol Belski #define WAS_NEWLINE(p) \
494a5bc5aedSAnatol Belski   ((NLBLOCK->nltype != NLTYPE_FIXED)? \
495a5bc5aedSAnatol Belski     ((p) > NLBLOCK->PSSTART && \
496a5bc5aedSAnatol Belski      PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
497a5bc5aedSAnatol Belski        &(NLBLOCK->nllen), utf)) \
498a5bc5aedSAnatol Belski     : \
499a5bc5aedSAnatol Belski     ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
500a5bc5aedSAnatol Belski      UCHAR21TEST(p - NLBLOCK->nllen) == NLBLOCK->nl[0] &&              \
501a5bc5aedSAnatol Belski      (NLBLOCK->nllen == 1 || UCHAR21TEST(p - NLBLOCK->nllen + 1) == NLBLOCK->nl[1]) \
502a5bc5aedSAnatol Belski     ) \
503a5bc5aedSAnatol Belski   )
504357ab3cbSAnatol Belski 
505a5bc5aedSAnatol Belski /* Private flags containing information about the compiled pattern. The first
506a5bc5aedSAnatol Belski three must not be changed, because whichever is set is actually the number of
507a5bc5aedSAnatol Belski bytes in a code unit in that mode. */
508a5bc5aedSAnatol Belski 
509a5bc5aedSAnatol Belski #define PCRE2_MODE8         0x00000001  /* compiled in 8 bit mode */
510a5bc5aedSAnatol Belski #define PCRE2_MODE16        0x00000002  /* compiled in 16 bit mode */
511a5bc5aedSAnatol Belski #define PCRE2_MODE32        0x00000004  /* compiled in 32 bit mode */
512a5bc5aedSAnatol Belski #define PCRE2_FIRSTSET      0x00000010  /* first_code unit is set */
513a5bc5aedSAnatol Belski #define PCRE2_FIRSTCASELESS 0x00000020  /* caseless first code unit */
514a5bc5aedSAnatol Belski #define PCRE2_FIRSTMAPSET   0x00000040  /* bitmap of first code units is set */
515a5bc5aedSAnatol Belski #define PCRE2_LASTSET       0x00000080  /* last code unit is set */
516a5bc5aedSAnatol Belski #define PCRE2_LASTCASELESS  0x00000100  /* caseless last code unit */
517a5bc5aedSAnatol Belski #define PCRE2_STARTLINE     0x00000200  /* start after \n for multiline */
518a5bc5aedSAnatol Belski #define PCRE2_JCHANGED      0x00000400  /* j option used in pattern */
519a5bc5aedSAnatol Belski #define PCRE2_HASCRORLF     0x00000800  /* explicit \r or \n in pattern */
520a5bc5aedSAnatol Belski #define PCRE2_HASTHEN       0x00001000  /* pattern contains (*THEN) */
521a5bc5aedSAnatol Belski #define PCRE2_MATCH_EMPTY   0x00002000  /* pattern can match empty string */
522a5bc5aedSAnatol Belski #define PCRE2_BSR_SET       0x00004000  /* BSR was set in the pattern */
523a5bc5aedSAnatol Belski #define PCRE2_NL_SET        0x00008000  /* newline was set in the pattern */
524a5bc5aedSAnatol Belski #define PCRE2_NOTEMPTY_SET  0x00010000  /* (*NOTEMPTY) used        ) keep */
525a5bc5aedSAnatol Belski #define PCRE2_NE_ATST_SET   0x00020000  /* (*NOTEMPTY_ATSTART) used) together */
526a5bc5aedSAnatol Belski #define PCRE2_DEREF_TABLES  0x00040000  /* release character tables */
527a5bc5aedSAnatol Belski #define PCRE2_NOJIT         0x00080000  /* (*NOJIT) used */
528a5bc5aedSAnatol Belski #define PCRE2_HASBKPORX     0x00100000  /* contains \P, \p, or \X */
529a5bc5aedSAnatol Belski #define PCRE2_DUPCAPUSED    0x00200000  /* contains (?| */
530a5bc5aedSAnatol Belski #define PCRE2_HASBKC        0x00400000  /* contains \C */
531225117afSChristoph M. Becker #define PCRE2_HASACCEPT     0x00800000  /* contains (*ACCEPT) */
532a5bc5aedSAnatol Belski 
533a5bc5aedSAnatol Belski #define PCRE2_MODE_MASK     (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
534a5bc5aedSAnatol Belski 
535a5bc5aedSAnatol Belski /* Values for the matchedby field in a match data block. */
536a5bc5aedSAnatol Belski 
537a5bc5aedSAnatol Belski enum { PCRE2_MATCHEDBY_INTERPRETER,     /* pcre2_match() */
538a5bc5aedSAnatol Belski        PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
539a5bc5aedSAnatol Belski        PCRE2_MATCHEDBY_JIT };           /* pcre2_jit_match() */
540e623e535SAndrei Zmievski 
541aa9433e9SAnatol Belski /* Values for the flags field in a match data block. */
542aa9433e9SAnatol Belski 
543aa9433e9SAnatol Belski #define PCRE2_MD_COPIED_SUBJECT  0x01u
544aa9433e9SAnatol Belski 
545276c5de0SAnatoliy Belsky /* Magic number to provide a small check against being handed junk. */
546e623e535SAndrei Zmievski 
547e623e535SAndrei Zmievski #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
548e623e535SAndrei Zmievski 
549a5bc5aedSAnatol Belski /* The maximum remaining length of subject we are prepared to search for a
550225117afSChristoph M. Becker req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
551225117afSChristoph M. Becker much faster than the search loop that has to be used in 16-bit and 32-bit
552225117afSChristoph M. Becker modes. */
553276c5de0SAnatoliy Belsky 
554a5bc5aedSAnatol Belski #if PCRE2_CODE_UNIT_WIDTH == 8
555225117afSChristoph M. Becker #define REQ_CU_MAX       5000
556a5bc5aedSAnatol Belski #else
557225117afSChristoph M. Becker #define REQ_CU_MAX       2000
558a5bc5aedSAnatol Belski #endif
559276c5de0SAnatoliy Belsky 
560a5bc5aedSAnatol Belski /* Offsets for the bitmap tables in the cbits set of tables. Each table
561a5bc5aedSAnatol Belski contains a set of bits for a class map. Some classes are built by combining
562a5bc5aedSAnatol Belski these tables. */
563e623e535SAndrei Zmievski 
564a5bc5aedSAnatol Belski #define cbit_space     0      /* [:space:] or \s */
565a5bc5aedSAnatol Belski #define cbit_xdigit   32      /* [:xdigit:] */
566a5bc5aedSAnatol Belski #define cbit_digit    64      /* [:digit:] or \d */
567a5bc5aedSAnatol Belski #define cbit_upper    96      /* [:upper:] */
568a5bc5aedSAnatol Belski #define cbit_lower   128      /* [:lower:] */
569a5bc5aedSAnatol Belski #define cbit_word    160      /* [:word:] or \w */
570a5bc5aedSAnatol Belski #define cbit_graph   192      /* [:graph:] */
571a5bc5aedSAnatol Belski #define cbit_print   224      /* [:print:] */
572a5bc5aedSAnatol Belski #define cbit_punct   256      /* [:punct:] */
573a5bc5aedSAnatol Belski #define cbit_cntrl   288      /* [:cntrl:] */
574a5bc5aedSAnatol Belski #define cbit_length  320      /* Length of the cbits table */
575e623e535SAndrei Zmievski 
576d918e077SAnatol Belski /* Bit definitions for entries in the ctypes table. Do not change these values
577d918e077SAnatol Belski without checking pcre2_jit_compile.c, which has an assertion to ensure that
578d918e077SAnatol Belski ctype_word has the value 16. */
579e623e535SAndrei Zmievski 
580aa9433e9SAnatol Belski #define ctype_space    0x01
581aa9433e9SAnatol Belski #define ctype_letter   0x02
582aa9433e9SAnatol Belski #define ctype_lcletter 0x04
583aa9433e9SAnatol Belski #define ctype_digit    0x08
584aa9433e9SAnatol Belski #define ctype_word     0x10    /* alphanumeric or '_' */
585e623e535SAndrei Zmievski 
586a5bc5aedSAnatol Belski /* Offsets of the various tables from the base tables pointer, and
587a5bc5aedSAnatol Belski total length of the tables. */
588a5bc5aedSAnatol Belski 
589a5bc5aedSAnatol Belski #define lcc_offset      0                           /* Lower case */
590a5bc5aedSAnatol Belski #define fcc_offset    256                           /* Flip case */
591a5bc5aedSAnatol Belski #define cbits_offset  512                           /* Character classes */
592a5bc5aedSAnatol Belski #define ctypes_offset (cbits_offset + cbit_length)  /* Character types */
593*9f2d0395SChristoph M. Becker #define TABLES_LENGTH (ctypes_offset + 256)
594e623e535SAndrei Zmievski 
595a5bc5aedSAnatol Belski 
596a5bc5aedSAnatol Belski /* -------------------- Character and string names ------------------------ */
597a5bc5aedSAnatol Belski 
598a5bc5aedSAnatol Belski /* If PCRE2 is to support UTF-8 on EBCDIC platforms, we cannot use normal
59990a2d197SNuno Lopes character constants like '*' because the compiler would emit their EBCDIC code,
60090a2d197SNuno Lopes which is different from their ASCII/UTF-8 code. Instead we define macros for
60190a2d197SNuno Lopes the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
60290a2d197SNuno Lopes is enabled. When UTF-8 support is not enabled, the definitions use character
60390a2d197SNuno Lopes literals. Both character and string versions of each character are needed, and
60490a2d197SNuno Lopes there are some longer strings as well.
60590a2d197SNuno Lopes 
606a5bc5aedSAnatol Belski This means that, on EBCDIC platforms, the PCRE2 library can handle either
60790a2d197SNuno Lopes EBCDIC, or UTF-8, but not both. To support both in the same compiled library
608a5bc5aedSAnatol Belski would need different lookups depending on whether PCRE2_UTF was set or not.
60990a2d197SNuno Lopes This would make it impossible to use characters in switch/case statements,
61090a2d197SNuno Lopes which would reduce performance. For a theoretical use (which nobody has asked
61190a2d197SNuno Lopes for) in a minority area (EBCDIC platforms), this is not sensible. Any
61290a2d197SNuno Lopes application that did need both could compile two versions of the library, using
61390a2d197SNuno Lopes macros to give the functions distinct names. */
61490a2d197SNuno Lopes 
615a5bc5aedSAnatol Belski #ifndef SUPPORT_UNICODE
61690a2d197SNuno Lopes 
61790a2d197SNuno Lopes /* UTF-8 support is not enabled; use the platform-dependent character literals
618a5bc5aedSAnatol Belski so that PCRE2 works in both ASCII and EBCDIC environments, but only in non-UTF
619357ab3cbSAnatol Belski mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
620357ab3cbSAnatol Belski characters, a common practice has been to use its NL (0x15) character as the
621357ab3cbSAnatol Belski line terminator in C-like processing environments. However, sometimes the LF
622357ab3cbSAnatol Belski (0x25) character is used instead, according to this Unicode document:
623357ab3cbSAnatol Belski 
624357ab3cbSAnatol Belski http://unicode.org/standard/reports/tr13/tr13-5.html
625357ab3cbSAnatol Belski 
626a5bc5aedSAnatol Belski PCRE2 defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
627357ab3cbSAnatol Belski instead. Whichever is *not* chosen is defined as NEL.
628357ab3cbSAnatol Belski 
629357ab3cbSAnatol Belski In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
630357ab3cbSAnatol Belski same code point. */
631357ab3cbSAnatol Belski 
632357ab3cbSAnatol Belski #ifdef EBCDIC
633357ab3cbSAnatol Belski 
634357ab3cbSAnatol Belski #ifndef EBCDIC_NL25
635357ab3cbSAnatol Belski #define CHAR_NL                     '\x15'
636357ab3cbSAnatol Belski #define CHAR_NEL                    '\x25'
637357ab3cbSAnatol Belski #define STR_NL                      "\x15"
638357ab3cbSAnatol Belski #define STR_NEL                     "\x25"
639357ab3cbSAnatol Belski #else
640357ab3cbSAnatol Belski #define CHAR_NL                     '\x25'
641357ab3cbSAnatol Belski #define CHAR_NEL                    '\x15'
642357ab3cbSAnatol Belski #define STR_NL                      "\x25"
643357ab3cbSAnatol Belski #define STR_NEL                     "\x15"
644357ab3cbSAnatol Belski #endif
645357ab3cbSAnatol Belski 
646357ab3cbSAnatol Belski #define CHAR_LF                     CHAR_NL
647357ab3cbSAnatol Belski #define STR_LF                      STR_NL
648357ab3cbSAnatol Belski 
649357ab3cbSAnatol Belski #define CHAR_ESC                    '\047'
650357ab3cbSAnatol Belski #define CHAR_DEL                    '\007'
651a5bc5aedSAnatol Belski #define CHAR_NBSP                   ((unsigned char)'\x41')
652357ab3cbSAnatol Belski #define STR_ESC                     "\047"
653357ab3cbSAnatol Belski #define STR_DEL                     "\007"
654357ab3cbSAnatol Belski 
655357ab3cbSAnatol Belski #else  /* Not EBCDIC */
65690a2d197SNuno Lopes 
657357ab3cbSAnatol Belski /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
658357ab3cbSAnatol Belski compatibility. NEL is the Unicode newline character; make sure it is
659357ab3cbSAnatol Belski a positive value. */
660357ab3cbSAnatol Belski 
661357ab3cbSAnatol Belski #define CHAR_LF                     '\n'
662357ab3cbSAnatol Belski #define CHAR_NL                     CHAR_LF
663357ab3cbSAnatol Belski #define CHAR_NEL                    ((unsigned char)'\x85')
664357ab3cbSAnatol Belski #define CHAR_ESC                    '\033'
665357ab3cbSAnatol Belski #define CHAR_DEL                    '\177'
666ca02d9c2SStanislav Malyshev #define CHAR_NBSP                   ((unsigned char)'\xa0')
667357ab3cbSAnatol Belski 
668357ab3cbSAnatol Belski #define STR_LF                      "\n"
669357ab3cbSAnatol Belski #define STR_NL                      STR_LF
670357ab3cbSAnatol Belski #define STR_NEL                     "\x85"
671357ab3cbSAnatol Belski #define STR_ESC                     "\033"
672357ab3cbSAnatol Belski #define STR_DEL                     "\177"
673357ab3cbSAnatol Belski 
674357ab3cbSAnatol Belski #endif  /* EBCDIC */
675357ab3cbSAnatol Belski 
676357ab3cbSAnatol Belski /* The remaining definitions work in both environments. */
677357ab3cbSAnatol Belski 
678a5bc5aedSAnatol Belski #define CHAR_NUL                    '\0'
67990a2d197SNuno Lopes #define CHAR_HT                     '\t'
68090a2d197SNuno Lopes #define CHAR_VT                     '\v'
68190a2d197SNuno Lopes #define CHAR_FF                     '\f'
68290a2d197SNuno Lopes #define CHAR_CR                     '\r'
68390a2d197SNuno Lopes #define CHAR_BS                     '\b'
68490a2d197SNuno Lopes #define CHAR_BEL                    '\a'
68590a2d197SNuno Lopes 
68690a2d197SNuno Lopes #define CHAR_SPACE                  ' '
68790a2d197SNuno Lopes #define CHAR_EXCLAMATION_MARK       '!'
68890a2d197SNuno Lopes #define CHAR_QUOTATION_MARK         '"'
68990a2d197SNuno Lopes #define CHAR_NUMBER_SIGN            '#'
69090a2d197SNuno Lopes #define CHAR_DOLLAR_SIGN            '$'
69190a2d197SNuno Lopes #define CHAR_PERCENT_SIGN           '%'
69290a2d197SNuno Lopes #define CHAR_AMPERSAND              '&'
69390a2d197SNuno Lopes #define CHAR_APOSTROPHE             '\''
69490a2d197SNuno Lopes #define CHAR_LEFT_PARENTHESIS       '('
69590a2d197SNuno Lopes #define CHAR_RIGHT_PARENTHESIS      ')'
69690a2d197SNuno Lopes #define CHAR_ASTERISK               '*'
69790a2d197SNuno Lopes #define CHAR_PLUS                   '+'
69890a2d197SNuno Lopes #define CHAR_COMMA                  ','
69990a2d197SNuno Lopes #define CHAR_MINUS                  '-'
70090a2d197SNuno Lopes #define CHAR_DOT                    '.'
70190a2d197SNuno Lopes #define CHAR_SLASH                  '/'
70290a2d197SNuno Lopes #define CHAR_0                      '0'
70390a2d197SNuno Lopes #define CHAR_1                      '1'
70490a2d197SNuno Lopes #define CHAR_2                      '2'
70590a2d197SNuno Lopes #define CHAR_3                      '3'
70690a2d197SNuno Lopes #define CHAR_4                      '4'
70790a2d197SNuno Lopes #define CHAR_5                      '5'
70890a2d197SNuno Lopes #define CHAR_6                      '6'
70990a2d197SNuno Lopes #define CHAR_7                      '7'
71090a2d197SNuno Lopes #define CHAR_8                      '8'
71190a2d197SNuno Lopes #define CHAR_9                      '9'
71290a2d197SNuno Lopes #define CHAR_COLON                  ':'
71390a2d197SNuno Lopes #define CHAR_SEMICOLON              ';'
71490a2d197SNuno Lopes #define CHAR_LESS_THAN_SIGN         '<'
71590a2d197SNuno Lopes #define CHAR_EQUALS_SIGN            '='
71690a2d197SNuno Lopes #define CHAR_GREATER_THAN_SIGN      '>'
71790a2d197SNuno Lopes #define CHAR_QUESTION_MARK          '?'
71890a2d197SNuno Lopes #define CHAR_COMMERCIAL_AT          '@'
71990a2d197SNuno Lopes #define CHAR_A                      'A'
72090a2d197SNuno Lopes #define CHAR_B                      'B'
72190a2d197SNuno Lopes #define CHAR_C                      'C'
72290a2d197SNuno Lopes #define CHAR_D                      'D'
72390a2d197SNuno Lopes #define CHAR_E                      'E'
72490a2d197SNuno Lopes #define CHAR_F                      'F'
72590a2d197SNuno Lopes #define CHAR_G                      'G'
72690a2d197SNuno Lopes #define CHAR_H                      'H'
72790a2d197SNuno Lopes #define CHAR_I                      'I'
72890a2d197SNuno Lopes #define CHAR_J                      'J'
72990a2d197SNuno Lopes #define CHAR_K                      'K'
73090a2d197SNuno Lopes #define CHAR_L                      'L'
73190a2d197SNuno Lopes #define CHAR_M                      'M'
73290a2d197SNuno Lopes #define CHAR_N                      'N'
73390a2d197SNuno Lopes #define CHAR_O                      'O'
73490a2d197SNuno Lopes #define CHAR_P                      'P'
73590a2d197SNuno Lopes #define CHAR_Q                      'Q'
73690a2d197SNuno Lopes #define CHAR_R                      'R'
73790a2d197SNuno Lopes #define CHAR_S                      'S'
73890a2d197SNuno Lopes #define CHAR_T                      'T'
73990a2d197SNuno Lopes #define CHAR_U                      'U'
74090a2d197SNuno Lopes #define CHAR_V                      'V'
74190a2d197SNuno Lopes #define CHAR_W                      'W'
74290a2d197SNuno Lopes #define CHAR_X                      'X'
74390a2d197SNuno Lopes #define CHAR_Y                      'Y'
74490a2d197SNuno Lopes #define CHAR_Z                      'Z'
74590a2d197SNuno Lopes #define CHAR_LEFT_SQUARE_BRACKET    '['
74690a2d197SNuno Lopes #define CHAR_BACKSLASH              '\\'
74790a2d197SNuno Lopes #define CHAR_RIGHT_SQUARE_BRACKET   ']'
74890a2d197SNuno Lopes #define CHAR_CIRCUMFLEX_ACCENT      '^'
74990a2d197SNuno Lopes #define CHAR_UNDERSCORE             '_'
75090a2d197SNuno Lopes #define CHAR_GRAVE_ACCENT           '`'
75190a2d197SNuno Lopes #define CHAR_a                      'a'
75290a2d197SNuno Lopes #define CHAR_b                      'b'
75390a2d197SNuno Lopes #define CHAR_c                      'c'
75490a2d197SNuno Lopes #define CHAR_d                      'd'
75590a2d197SNuno Lopes #define CHAR_e                      'e'
75690a2d197SNuno Lopes #define CHAR_f                      'f'
75790a2d197SNuno Lopes #define CHAR_g                      'g'
75890a2d197SNuno Lopes #define CHAR_h                      'h'
75990a2d197SNuno Lopes #define CHAR_i                      'i'
76090a2d197SNuno Lopes #define CHAR_j                      'j'
76190a2d197SNuno Lopes #define CHAR_k                      'k'
76290a2d197SNuno Lopes #define CHAR_l                      'l'
76390a2d197SNuno Lopes #define CHAR_m                      'm'
76490a2d197SNuno Lopes #define CHAR_n                      'n'
76590a2d197SNuno Lopes #define CHAR_o                      'o'
76690a2d197SNuno Lopes #define CHAR_p                      'p'
76790a2d197SNuno Lopes #define CHAR_q                      'q'
76890a2d197SNuno Lopes #define CHAR_r                      'r'
76990a2d197SNuno Lopes #define CHAR_s                      's'
77090a2d197SNuno Lopes #define CHAR_t                      't'
77190a2d197SNuno Lopes #define CHAR_u                      'u'
77290a2d197SNuno Lopes #define CHAR_v                      'v'
77390a2d197SNuno Lopes #define CHAR_w                      'w'
77490a2d197SNuno Lopes #define CHAR_x                      'x'
77590a2d197SNuno Lopes #define CHAR_y                      'y'
77690a2d197SNuno Lopes #define CHAR_z                      'z'
77790a2d197SNuno Lopes #define CHAR_LEFT_CURLY_BRACKET     '{'
77890a2d197SNuno Lopes #define CHAR_VERTICAL_LINE          '|'
77990a2d197SNuno Lopes #define CHAR_RIGHT_CURLY_BRACKET    '}'
78090a2d197SNuno Lopes #define CHAR_TILDE                  '~'
78190a2d197SNuno Lopes 
78290a2d197SNuno Lopes #define STR_HT                      "\t"
78390a2d197SNuno Lopes #define STR_VT                      "\v"
78490a2d197SNuno Lopes #define STR_FF                      "\f"
78590a2d197SNuno Lopes #define STR_CR                      "\r"
78690a2d197SNuno Lopes #define STR_BS                      "\b"
78790a2d197SNuno Lopes #define STR_BEL                     "\a"
78890a2d197SNuno Lopes 
78990a2d197SNuno Lopes #define STR_SPACE                   " "
79090a2d197SNuno Lopes #define STR_EXCLAMATION_MARK        "!"
79190a2d197SNuno Lopes #define STR_QUOTATION_MARK          "\""
79290a2d197SNuno Lopes #define STR_NUMBER_SIGN             "#"
79390a2d197SNuno Lopes #define STR_DOLLAR_SIGN             "$"
79490a2d197SNuno Lopes #define STR_PERCENT_SIGN            "%"
79590a2d197SNuno Lopes #define STR_AMPERSAND               "&"
79690a2d197SNuno Lopes #define STR_APOSTROPHE              "'"
79790a2d197SNuno Lopes #define STR_LEFT_PARENTHESIS        "("
79890a2d197SNuno Lopes #define STR_RIGHT_PARENTHESIS       ")"
79990a2d197SNuno Lopes #define STR_ASTERISK                "*"
80090a2d197SNuno Lopes #define STR_PLUS                    "+"
80190a2d197SNuno Lopes #define STR_COMMA                   ","
80290a2d197SNuno Lopes #define STR_MINUS                   "-"
80390a2d197SNuno Lopes #define STR_DOT                     "."
80490a2d197SNuno Lopes #define STR_SLASH                   "/"
80590a2d197SNuno Lopes #define STR_0                       "0"
80690a2d197SNuno Lopes #define STR_1                       "1"
80790a2d197SNuno Lopes #define STR_2                       "2"
80890a2d197SNuno Lopes #define STR_3                       "3"
80990a2d197SNuno Lopes #define STR_4                       "4"
81090a2d197SNuno Lopes #define STR_5                       "5"
81190a2d197SNuno Lopes #define STR_6                       "6"
81290a2d197SNuno Lopes #define STR_7                       "7"
81390a2d197SNuno Lopes #define STR_8                       "8"
81490a2d197SNuno Lopes #define STR_9                       "9"
81590a2d197SNuno Lopes #define STR_COLON                   ":"
81690a2d197SNuno Lopes #define STR_SEMICOLON               ";"
81790a2d197SNuno Lopes #define STR_LESS_THAN_SIGN          "<"
81890a2d197SNuno Lopes #define STR_EQUALS_SIGN             "="
81990a2d197SNuno Lopes #define STR_GREATER_THAN_SIGN       ">"
82090a2d197SNuno Lopes #define STR_QUESTION_MARK           "?"
82190a2d197SNuno Lopes #define STR_COMMERCIAL_AT           "@"
82290a2d197SNuno Lopes #define STR_A                       "A"
82390a2d197SNuno Lopes #define STR_B                       "B"
82490a2d197SNuno Lopes #define STR_C                       "C"
82590a2d197SNuno Lopes #define STR_D                       "D"
82690a2d197SNuno Lopes #define STR_E                       "E"
82790a2d197SNuno Lopes #define STR_F                       "F"
82890a2d197SNuno Lopes #define STR_G                       "G"
82990a2d197SNuno Lopes #define STR_H                       "H"
83090a2d197SNuno Lopes #define STR_I                       "I"
83190a2d197SNuno Lopes #define STR_J                       "J"
83290a2d197SNuno Lopes #define STR_K                       "K"
83390a2d197SNuno Lopes #define STR_L                       "L"
83490a2d197SNuno Lopes #define STR_M                       "M"
83590a2d197SNuno Lopes #define STR_N                       "N"
83690a2d197SNuno Lopes #define STR_O                       "O"
83790a2d197SNuno Lopes #define STR_P                       "P"
83890a2d197SNuno Lopes #define STR_Q                       "Q"
83990a2d197SNuno Lopes #define STR_R                       "R"
84090a2d197SNuno Lopes #define STR_S                       "S"
84190a2d197SNuno Lopes #define STR_T                       "T"
84290a2d197SNuno Lopes #define STR_U                       "U"
84390a2d197SNuno Lopes #define STR_V                       "V"
84490a2d197SNuno Lopes #define STR_W                       "W"
84590a2d197SNuno Lopes #define STR_X                       "X"
84690a2d197SNuno Lopes #define STR_Y                       "Y"
84790a2d197SNuno Lopes #define STR_Z                       "Z"
84890a2d197SNuno Lopes #define STR_LEFT_SQUARE_BRACKET     "["
84990a2d197SNuno Lopes #define STR_BACKSLASH               "\\"
85090a2d197SNuno Lopes #define STR_RIGHT_SQUARE_BRACKET    "]"
85190a2d197SNuno Lopes #define STR_CIRCUMFLEX_ACCENT       "^"
85290a2d197SNuno Lopes #define STR_UNDERSCORE              "_"
85390a2d197SNuno Lopes #define STR_GRAVE_ACCENT            "`"
85490a2d197SNuno Lopes #define STR_a                       "a"
85590a2d197SNuno Lopes #define STR_b                       "b"
85690a2d197SNuno Lopes #define STR_c                       "c"
85790a2d197SNuno Lopes #define STR_d                       "d"
85890a2d197SNuno Lopes #define STR_e                       "e"
85990a2d197SNuno Lopes #define STR_f                       "f"
86090a2d197SNuno Lopes #define STR_g                       "g"
86190a2d197SNuno Lopes #define STR_h                       "h"
86290a2d197SNuno Lopes #define STR_i                       "i"
86390a2d197SNuno Lopes #define STR_j                       "j"
86490a2d197SNuno Lopes #define STR_k                       "k"
86590a2d197SNuno Lopes #define STR_l                       "l"
86690a2d197SNuno Lopes #define STR_m                       "m"
86790a2d197SNuno Lopes #define STR_n                       "n"
86890a2d197SNuno Lopes #define STR_o                       "o"
86990a2d197SNuno Lopes #define STR_p                       "p"
87090a2d197SNuno Lopes #define STR_q                       "q"
87190a2d197SNuno Lopes #define STR_r                       "r"
87290a2d197SNuno Lopes #define STR_s                       "s"
87390a2d197SNuno Lopes #define STR_t                       "t"
87490a2d197SNuno Lopes #define STR_u                       "u"
87590a2d197SNuno Lopes #define STR_v                       "v"
87690a2d197SNuno Lopes #define STR_w                       "w"
87790a2d197SNuno Lopes #define STR_x                       "x"
87890a2d197SNuno Lopes #define STR_y                       "y"
87990a2d197SNuno Lopes #define STR_z                       "z"
88090a2d197SNuno Lopes #define STR_LEFT_CURLY_BRACKET      "{"
88190a2d197SNuno Lopes #define STR_VERTICAL_LINE           "|"
88290a2d197SNuno Lopes #define STR_RIGHT_CURLY_BRACKET     "}"
88390a2d197SNuno Lopes #define STR_TILDE                   "~"
88490a2d197SNuno Lopes 
885aa9433e9SAnatol Belski #define STRING_ACCEPT0               "ACCEPT\0"
886aa9433e9SAnatol Belski #define STRING_COMMIT0               "COMMIT\0"
887aa9433e9SAnatol Belski #define STRING_F0                    "F\0"
888aa9433e9SAnatol Belski #define STRING_FAIL0                 "FAIL\0"
889aa9433e9SAnatol Belski #define STRING_MARK0                 "MARK\0"
890aa9433e9SAnatol Belski #define STRING_PRUNE0                "PRUNE\0"
891aa9433e9SAnatol Belski #define STRING_SKIP0                 "SKIP\0"
892aa9433e9SAnatol Belski #define STRING_THEN                  "THEN"
893aa9433e9SAnatol Belski 
894aa9433e9SAnatol Belski #define STRING_atomic0               "atomic\0"
895aa9433e9SAnatol Belski #define STRING_pla0                  "pla\0"
896aa9433e9SAnatol Belski #define STRING_plb0                  "plb\0"
897225117afSChristoph M. Becker #define STRING_napla0                "napla\0"
898225117afSChristoph M. Becker #define STRING_naplb0                "naplb\0"
899aa9433e9SAnatol Belski #define STRING_nla0                  "nla\0"
900aa9433e9SAnatol Belski #define STRING_nlb0                  "nlb\0"
901aa9433e9SAnatol Belski #define STRING_sr0                   "sr\0"
902aa9433e9SAnatol Belski #define STRING_asr0                  "asr\0"
903aa9433e9SAnatol Belski #define STRING_positive_lookahead0   "positive_lookahead\0"
904aa9433e9SAnatol Belski #define STRING_positive_lookbehind0  "positive_lookbehind\0"
905225117afSChristoph M. Becker #define STRING_non_atomic_positive_lookahead0   "non_atomic_positive_lookahead\0"
906225117afSChristoph M. Becker #define STRING_non_atomic_positive_lookbehind0  "non_atomic_positive_lookbehind\0"
907aa9433e9SAnatol Belski #define STRING_negative_lookahead0   "negative_lookahead\0"
908aa9433e9SAnatol Belski #define STRING_negative_lookbehind0  "negative_lookbehind\0"
909aa9433e9SAnatol Belski #define STRING_script_run0           "script_run\0"
910aa9433e9SAnatol Belski #define STRING_atomic_script_run     "atomic_script_run"
911aa9433e9SAnatol Belski 
912aa9433e9SAnatol Belski #define STRING_alpha0                "alpha\0"
913aa9433e9SAnatol Belski #define STRING_lower0                "lower\0"
914aa9433e9SAnatol Belski #define STRING_upper0                "upper\0"
915aa9433e9SAnatol Belski #define STRING_alnum0                "alnum\0"
916aa9433e9SAnatol Belski #define STRING_ascii0                "ascii\0"
917aa9433e9SAnatol Belski #define STRING_blank0                "blank\0"
918aa9433e9SAnatol Belski #define STRING_cntrl0                "cntrl\0"
919aa9433e9SAnatol Belski #define STRING_digit0                "digit\0"
920aa9433e9SAnatol Belski #define STRING_graph0                "graph\0"
921aa9433e9SAnatol Belski #define STRING_print0                "print\0"
922aa9433e9SAnatol Belski #define STRING_punct0                "punct\0"
923aa9433e9SAnatol Belski #define STRING_space0                "space\0"
924aa9433e9SAnatol Belski #define STRING_word0                 "word\0"
925aa9433e9SAnatol Belski #define STRING_xdigit                "xdigit"
926aa9433e9SAnatol Belski 
927aa9433e9SAnatol Belski #define STRING_DEFINE                "DEFINE"
928aa9433e9SAnatol Belski #define STRING_VERSION               "VERSION"
929aa9433e9SAnatol Belski #define STRING_WEIRD_STARTWORD       "[:<:]]"
930aa9433e9SAnatol Belski #define STRING_WEIRD_ENDWORD         "[:>:]]"
93123cb7bd5SAnatol Belski 
932a5bc5aedSAnatol Belski #define STRING_CR_RIGHTPAR                "CR)"
933a5bc5aedSAnatol Belski #define STRING_LF_RIGHTPAR                "LF)"
934a5bc5aedSAnatol Belski #define STRING_CRLF_RIGHTPAR              "CRLF)"
935a5bc5aedSAnatol Belski #define STRING_ANY_RIGHTPAR               "ANY)"
936a5bc5aedSAnatol Belski #define STRING_ANYCRLF_RIGHTPAR           "ANYCRLF)"
937a5bc5aedSAnatol Belski #define STRING_NUL_RIGHTPAR               "NUL)"
938a5bc5aedSAnatol Belski #define STRING_BSR_ANYCRLF_RIGHTPAR       "BSR_ANYCRLF)"
939a5bc5aedSAnatol Belski #define STRING_BSR_UNICODE_RIGHTPAR       "BSR_UNICODE)"
940a5bc5aedSAnatol Belski #define STRING_UTF8_RIGHTPAR              "UTF8)"
941a5bc5aedSAnatol Belski #define STRING_UTF16_RIGHTPAR             "UTF16)"
942a5bc5aedSAnatol Belski #define STRING_UTF32_RIGHTPAR             "UTF32)"
943a5bc5aedSAnatol Belski #define STRING_UTF_RIGHTPAR               "UTF)"
944a5bc5aedSAnatol Belski #define STRING_UCP_RIGHTPAR               "UCP)"
945a5bc5aedSAnatol Belski #define STRING_NO_AUTO_POSSESS_RIGHTPAR   "NO_AUTO_POSSESS)"
946a5bc5aedSAnatol Belski #define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR "NO_DOTSTAR_ANCHOR)"
947a5bc5aedSAnatol Belski #define STRING_NO_JIT_RIGHTPAR            "NO_JIT)"
948a5bc5aedSAnatol Belski #define STRING_NO_START_OPT_RIGHTPAR      "NO_START_OPT)"
949a5bc5aedSAnatol Belski #define STRING_NOTEMPTY_RIGHTPAR          "NOTEMPTY)"
950a5bc5aedSAnatol Belski #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  "NOTEMPTY_ATSTART)"
951a5bc5aedSAnatol Belski #define STRING_LIMIT_HEAP_EQ              "LIMIT_HEAP="
952a5bc5aedSAnatol Belski #define STRING_LIMIT_MATCH_EQ             "LIMIT_MATCH="
953a5bc5aedSAnatol Belski #define STRING_LIMIT_DEPTH_EQ             "LIMIT_DEPTH="
954a5bc5aedSAnatol Belski #define STRING_LIMIT_RECURSION_EQ         "LIMIT_RECURSION="
955a5bc5aedSAnatol Belski #define STRING_MARK                       "MARK"
956a5bc5aedSAnatol Belski 
957a5bc5aedSAnatol Belski #else  /* SUPPORT_UNICODE */
95890a2d197SNuno Lopes 
95990a2d197SNuno Lopes /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
96090a2d197SNuno Lopes works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
96190a2d197SNuno Lopes only. */
96290a2d197SNuno Lopes 
96390a2d197SNuno Lopes #define CHAR_HT                     '\011'
96490a2d197SNuno Lopes #define CHAR_VT                     '\013'
96590a2d197SNuno Lopes #define CHAR_FF                     '\014'
96690a2d197SNuno Lopes #define CHAR_CR                     '\015'
967357ab3cbSAnatol Belski #define CHAR_LF                     '\012'
968357ab3cbSAnatol Belski #define CHAR_NL                     CHAR_LF
969357ab3cbSAnatol Belski #define CHAR_NEL                    ((unsigned char)'\x85')
97090a2d197SNuno Lopes #define CHAR_BS                     '\010'
97190a2d197SNuno Lopes #define CHAR_BEL                    '\007'
97290a2d197SNuno Lopes #define CHAR_ESC                    '\033'
97390a2d197SNuno Lopes #define CHAR_DEL                    '\177'
97490a2d197SNuno Lopes 
975a5bc5aedSAnatol Belski #define CHAR_NUL                    '\0'
97690a2d197SNuno Lopes #define CHAR_SPACE                  '\040'
97790a2d197SNuno Lopes #define CHAR_EXCLAMATION_MARK       '\041'
97890a2d197SNuno Lopes #define CHAR_QUOTATION_MARK         '\042'
97990a2d197SNuno Lopes #define CHAR_NUMBER_SIGN            '\043'
98090a2d197SNuno Lopes #define CHAR_DOLLAR_SIGN            '\044'
98190a2d197SNuno Lopes #define CHAR_PERCENT_SIGN           '\045'
98290a2d197SNuno Lopes #define CHAR_AMPERSAND              '\046'
98390a2d197SNuno Lopes #define CHAR_APOSTROPHE             '\047'
98490a2d197SNuno Lopes #define CHAR_LEFT_PARENTHESIS       '\050'
98590a2d197SNuno Lopes #define CHAR_RIGHT_PARENTHESIS      '\051'
98690a2d197SNuno Lopes #define CHAR_ASTERISK               '\052'
98790a2d197SNuno Lopes #define CHAR_PLUS                   '\053'
98890a2d197SNuno Lopes #define CHAR_COMMA                  '\054'
98990a2d197SNuno Lopes #define CHAR_MINUS                  '\055'
99090a2d197SNuno Lopes #define CHAR_DOT                    '\056'
99190a2d197SNuno Lopes #define CHAR_SLASH                  '\057'
99290a2d197SNuno Lopes #define CHAR_0                      '\060'
99390a2d197SNuno Lopes #define CHAR_1                      '\061'
99490a2d197SNuno Lopes #define CHAR_2                      '\062'
99590a2d197SNuno Lopes #define CHAR_3                      '\063'
99690a2d197SNuno Lopes #define CHAR_4                      '\064'
99790a2d197SNuno Lopes #define CHAR_5                      '\065'
99890a2d197SNuno Lopes #define CHAR_6                      '\066'
99990a2d197SNuno Lopes #define CHAR_7                      '\067'
100090a2d197SNuno Lopes #define CHAR_8                      '\070'
100190a2d197SNuno Lopes #define CHAR_9                      '\071'
100290a2d197SNuno Lopes #define CHAR_COLON                  '\072'
100390a2d197SNuno Lopes #define CHAR_SEMICOLON              '\073'
100490a2d197SNuno Lopes #define CHAR_LESS_THAN_SIGN         '\074'
100590a2d197SNuno Lopes #define CHAR_EQUALS_SIGN            '\075'
100690a2d197SNuno Lopes #define CHAR_GREATER_THAN_SIGN      '\076'
100790a2d197SNuno Lopes #define CHAR_QUESTION_MARK          '\077'
100890a2d197SNuno Lopes #define CHAR_COMMERCIAL_AT          '\100'
100990a2d197SNuno Lopes #define CHAR_A                      '\101'
101090a2d197SNuno Lopes #define CHAR_B                      '\102'
101190a2d197SNuno Lopes #define CHAR_C                      '\103'
101290a2d197SNuno Lopes #define CHAR_D                      '\104'
101390a2d197SNuno Lopes #define CHAR_E                      '\105'
101490a2d197SNuno Lopes #define CHAR_F                      '\106'
101590a2d197SNuno Lopes #define CHAR_G                      '\107'
101690a2d197SNuno Lopes #define CHAR_H                      '\110'
101790a2d197SNuno Lopes #define CHAR_I                      '\111'
101890a2d197SNuno Lopes #define CHAR_J                      '\112'
101990a2d197SNuno Lopes #define CHAR_K                      '\113'
102090a2d197SNuno Lopes #define CHAR_L                      '\114'
102190a2d197SNuno Lopes #define CHAR_M                      '\115'
102290a2d197SNuno Lopes #define CHAR_N                      '\116'
102390a2d197SNuno Lopes #define CHAR_O                      '\117'
102490a2d197SNuno Lopes #define CHAR_P                      '\120'
102590a2d197SNuno Lopes #define CHAR_Q                      '\121'
102690a2d197SNuno Lopes #define CHAR_R                      '\122'
102790a2d197SNuno Lopes #define CHAR_S                      '\123'
102890a2d197SNuno Lopes #define CHAR_T                      '\124'
102990a2d197SNuno Lopes #define CHAR_U                      '\125'
103090a2d197SNuno Lopes #define CHAR_V                      '\126'
103190a2d197SNuno Lopes #define CHAR_W                      '\127'
103290a2d197SNuno Lopes #define CHAR_X                      '\130'
103390a2d197SNuno Lopes #define CHAR_Y                      '\131'
103490a2d197SNuno Lopes #define CHAR_Z                      '\132'
103590a2d197SNuno Lopes #define CHAR_LEFT_SQUARE_BRACKET    '\133'
103690a2d197SNuno Lopes #define CHAR_BACKSLASH              '\134'
103790a2d197SNuno Lopes #define CHAR_RIGHT_SQUARE_BRACKET   '\135'
103890a2d197SNuno Lopes #define CHAR_CIRCUMFLEX_ACCENT      '\136'
103990a2d197SNuno Lopes #define CHAR_UNDERSCORE             '\137'
104090a2d197SNuno Lopes #define CHAR_GRAVE_ACCENT           '\140'
104190a2d197SNuno Lopes #define CHAR_a                      '\141'
104290a2d197SNuno Lopes #define CHAR_b                      '\142'
104390a2d197SNuno Lopes #define CHAR_c                      '\143'
104490a2d197SNuno Lopes #define CHAR_d                      '\144'
104590a2d197SNuno Lopes #define CHAR_e                      '\145'
104690a2d197SNuno Lopes #define CHAR_f                      '\146'
104790a2d197SNuno Lopes #define CHAR_g                      '\147'
104890a2d197SNuno Lopes #define CHAR_h                      '\150'
104990a2d197SNuno Lopes #define CHAR_i                      '\151'
105090a2d197SNuno Lopes #define CHAR_j                      '\152'
105190a2d197SNuno Lopes #define CHAR_k                      '\153'
105290a2d197SNuno Lopes #define CHAR_l                      '\154'
105390a2d197SNuno Lopes #define CHAR_m                      '\155'
105490a2d197SNuno Lopes #define CHAR_n                      '\156'
105590a2d197SNuno Lopes #define CHAR_o                      '\157'
105690a2d197SNuno Lopes #define CHAR_p                      '\160'
105790a2d197SNuno Lopes #define CHAR_q                      '\161'
105890a2d197SNuno Lopes #define CHAR_r                      '\162'
105990a2d197SNuno Lopes #define CHAR_s                      '\163'
106090a2d197SNuno Lopes #define CHAR_t                      '\164'
106190a2d197SNuno Lopes #define CHAR_u                      '\165'
106290a2d197SNuno Lopes #define CHAR_v                      '\166'
106390a2d197SNuno Lopes #define CHAR_w                      '\167'
106490a2d197SNuno Lopes #define CHAR_x                      '\170'
106590a2d197SNuno Lopes #define CHAR_y                      '\171'
106690a2d197SNuno Lopes #define CHAR_z                      '\172'
106790a2d197SNuno Lopes #define CHAR_LEFT_CURLY_BRACKET     '\173'
106890a2d197SNuno Lopes #define CHAR_VERTICAL_LINE          '\174'
106990a2d197SNuno Lopes #define CHAR_RIGHT_CURLY_BRACKET    '\175'
107090a2d197SNuno Lopes #define CHAR_TILDE                  '\176'
1071ca02d9c2SStanislav Malyshev #define CHAR_NBSP                   ((unsigned char)'\xa0')
107290a2d197SNuno Lopes 
107390a2d197SNuno Lopes #define STR_HT                      "\011"
107490a2d197SNuno Lopes #define STR_VT                      "\013"
107590a2d197SNuno Lopes #define STR_FF                      "\014"
107690a2d197SNuno Lopes #define STR_CR                      "\015"
107790a2d197SNuno Lopes #define STR_NL                      "\012"
107890a2d197SNuno Lopes #define STR_BS                      "\010"
107990a2d197SNuno Lopes #define STR_BEL                     "\007"
108090a2d197SNuno Lopes #define STR_ESC                     "\033"
108190a2d197SNuno Lopes #define STR_DEL                     "\177"
108290a2d197SNuno Lopes 
108390a2d197SNuno Lopes #define STR_SPACE                   "\040"
108490a2d197SNuno Lopes #define STR_EXCLAMATION_MARK        "\041"
108590a2d197SNuno Lopes #define STR_QUOTATION_MARK          "\042"
108690a2d197SNuno Lopes #define STR_NUMBER_SIGN             "\043"
108790a2d197SNuno Lopes #define STR_DOLLAR_SIGN             "\044"
108890a2d197SNuno Lopes #define STR_PERCENT_SIGN            "\045"
108990a2d197SNuno Lopes #define STR_AMPERSAND               "\046"
109090a2d197SNuno Lopes #define STR_APOSTROPHE              "\047"
109190a2d197SNuno Lopes #define STR_LEFT_PARENTHESIS        "\050"
109290a2d197SNuno Lopes #define STR_RIGHT_PARENTHESIS       "\051"
109390a2d197SNuno Lopes #define STR_ASTERISK                "\052"
109490a2d197SNuno Lopes #define STR_PLUS                    "\053"
109590a2d197SNuno Lopes #define STR_COMMA                   "\054"
109690a2d197SNuno Lopes #define STR_MINUS                   "\055"
109790a2d197SNuno Lopes #define STR_DOT                     "\056"
109890a2d197SNuno Lopes #define STR_SLASH                   "\057"
109990a2d197SNuno Lopes #define STR_0                       "\060"
110090a2d197SNuno Lopes #define STR_1                       "\061"
110190a2d197SNuno Lopes #define STR_2                       "\062"
110290a2d197SNuno Lopes #define STR_3                       "\063"
110390a2d197SNuno Lopes #define STR_4                       "\064"
110490a2d197SNuno Lopes #define STR_5                       "\065"
110590a2d197SNuno Lopes #define STR_6                       "\066"
110690a2d197SNuno Lopes #define STR_7                       "\067"
110790a2d197SNuno Lopes #define STR_8                       "\070"
110890a2d197SNuno Lopes #define STR_9                       "\071"
110990a2d197SNuno Lopes #define STR_COLON                   "\072"
111090a2d197SNuno Lopes #define STR_SEMICOLON               "\073"
111190a2d197SNuno Lopes #define STR_LESS_THAN_SIGN          "\074"
111290a2d197SNuno Lopes #define STR_EQUALS_SIGN             "\075"
111390a2d197SNuno Lopes #define STR_GREATER_THAN_SIGN       "\076"
111490a2d197SNuno Lopes #define STR_QUESTION_MARK           "\077"
111590a2d197SNuno Lopes #define STR_COMMERCIAL_AT           "\100"
111690a2d197SNuno Lopes #define STR_A                       "\101"
111790a2d197SNuno Lopes #define STR_B                       "\102"
111890a2d197SNuno Lopes #define STR_C                       "\103"
111990a2d197SNuno Lopes #define STR_D                       "\104"
112090a2d197SNuno Lopes #define STR_E                       "\105"
112190a2d197SNuno Lopes #define STR_F                       "\106"
112290a2d197SNuno Lopes #define STR_G                       "\107"
112390a2d197SNuno Lopes #define STR_H                       "\110"
112490a2d197SNuno Lopes #define STR_I                       "\111"
112590a2d197SNuno Lopes #define STR_J                       "\112"
112690a2d197SNuno Lopes #define STR_K                       "\113"
112790a2d197SNuno Lopes #define STR_L                       "\114"
112890a2d197SNuno Lopes #define STR_M                       "\115"
112990a2d197SNuno Lopes #define STR_N                       "\116"
113090a2d197SNuno Lopes #define STR_O                       "\117"
113190a2d197SNuno Lopes #define STR_P                       "\120"
113290a2d197SNuno Lopes #define STR_Q                       "\121"
113390a2d197SNuno Lopes #define STR_R                       "\122"
113490a2d197SNuno Lopes #define STR_S                       "\123"
113590a2d197SNuno Lopes #define STR_T                       "\124"
113690a2d197SNuno Lopes #define STR_U                       "\125"
113790a2d197SNuno Lopes #define STR_V                       "\126"
113890a2d197SNuno Lopes #define STR_W                       "\127"
113990a2d197SNuno Lopes #define STR_X                       "\130"
114090a2d197SNuno Lopes #define STR_Y                       "\131"
114190a2d197SNuno Lopes #define STR_Z                       "\132"
114290a2d197SNuno Lopes #define STR_LEFT_SQUARE_BRACKET     "\133"
114390a2d197SNuno Lopes #define STR_BACKSLASH               "\134"
114490a2d197SNuno Lopes #define STR_RIGHT_SQUARE_BRACKET    "\135"
114590a2d197SNuno Lopes #define STR_CIRCUMFLEX_ACCENT       "\136"
114690a2d197SNuno Lopes #define STR_UNDERSCORE              "\137"
114790a2d197SNuno Lopes #define STR_GRAVE_ACCENT            "\140"
114890a2d197SNuno Lopes #define STR_a                       "\141"
114990a2d197SNuno Lopes #define STR_b                       "\142"
115090a2d197SNuno Lopes #define STR_c                       "\143"
115190a2d197SNuno Lopes #define STR_d                       "\144"
115290a2d197SNuno Lopes #define STR_e                       "\145"
115390a2d197SNuno Lopes #define STR_f                       "\146"
115490a2d197SNuno Lopes #define STR_g                       "\147"
115590a2d197SNuno Lopes #define STR_h                       "\150"
115690a2d197SNuno Lopes #define STR_i                       "\151"
115790a2d197SNuno Lopes #define STR_j                       "\152"
115890a2d197SNuno Lopes #define STR_k                       "\153"
115990a2d197SNuno Lopes #define STR_l                       "\154"
116090a2d197SNuno Lopes #define STR_m                       "\155"
116190a2d197SNuno Lopes #define STR_n                       "\156"
116290a2d197SNuno Lopes #define STR_o                       "\157"
116390a2d197SNuno Lopes #define STR_p                       "\160"
116490a2d197SNuno Lopes #define STR_q                       "\161"
116590a2d197SNuno Lopes #define STR_r                       "\162"
116690a2d197SNuno Lopes #define STR_s                       "\163"
116790a2d197SNuno Lopes #define STR_t                       "\164"
116890a2d197SNuno Lopes #define STR_u                       "\165"
116990a2d197SNuno Lopes #define STR_v                       "\166"
117090a2d197SNuno Lopes #define STR_w                       "\167"
117190a2d197SNuno Lopes #define STR_x                       "\170"
117290a2d197SNuno Lopes #define STR_y                       "\171"
117390a2d197SNuno Lopes #define STR_z                       "\172"
117490a2d197SNuno Lopes #define STR_LEFT_CURLY_BRACKET      "\173"
117590a2d197SNuno Lopes #define STR_VERTICAL_LINE           "\174"
117690a2d197SNuno Lopes #define STR_RIGHT_CURLY_BRACKET     "\175"
117790a2d197SNuno Lopes #define STR_TILDE                   "\176"
117890a2d197SNuno Lopes 
1179aa9433e9SAnatol Belski #define STRING_ACCEPT0               STR_A STR_C STR_C STR_E STR_P STR_T "\0"
1180aa9433e9SAnatol Belski #define STRING_COMMIT0               STR_C STR_O STR_M STR_M STR_I STR_T "\0"
1181aa9433e9SAnatol Belski #define STRING_F0                    STR_F "\0"
1182aa9433e9SAnatol Belski #define STRING_FAIL0                 STR_F STR_A STR_I STR_L "\0"
1183aa9433e9SAnatol Belski #define STRING_MARK0                 STR_M STR_A STR_R STR_K "\0"
1184aa9433e9SAnatol Belski #define STRING_PRUNE0                STR_P STR_R STR_U STR_N STR_E "\0"
1185aa9433e9SAnatol Belski #define STRING_SKIP0                 STR_S STR_K STR_I STR_P "\0"
1186aa9433e9SAnatol Belski #define STRING_THEN                  STR_T STR_H STR_E STR_N
1187aa9433e9SAnatol Belski 
1188aa9433e9SAnatol Belski #define STRING_atomic0               STR_a STR_t STR_o STR_m STR_i STR_c "\0"
1189aa9433e9SAnatol Belski #define STRING_pla0                  STR_p STR_l STR_a "\0"
1190aa9433e9SAnatol Belski #define STRING_plb0                  STR_p STR_l STR_b "\0"
1191225117afSChristoph M. Becker #define STRING_napla0                STR_n STR_a STR_p STR_l STR_a "\0"
1192225117afSChristoph M. Becker #define STRING_naplb0                STR_n STR_a STR_p STR_l STR_b "\0"
1193aa9433e9SAnatol Belski #define STRING_nla0                  STR_n STR_l STR_a "\0"
1194aa9433e9SAnatol Belski #define STRING_nlb0                  STR_n STR_l STR_b "\0"
1195aa9433e9SAnatol Belski #define STRING_sr0                   STR_s STR_r "\0"
1196aa9433e9SAnatol Belski #define STRING_asr0                  STR_a STR_s STR_r "\0"
1197aa9433e9SAnatol Belski #define STRING_positive_lookahead0   STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
1198aa9433e9SAnatol Belski #define STRING_positive_lookbehind0  STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
1199225117afSChristoph M. Becker #define STRING_non_atomic_positive_lookahead0   STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
1200225117afSChristoph M. Becker #define STRING_non_atomic_positive_lookbehind0  STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
1201aa9433e9SAnatol Belski #define STRING_negative_lookahead0   STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
1202aa9433e9SAnatol Belski #define STRING_negative_lookbehind0  STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
1203aa9433e9SAnatol Belski #define STRING_script_run0           STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
1204aa9433e9SAnatol Belski #define STRING_atomic_script_run     STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
1205aa9433e9SAnatol Belski 
1206aa9433e9SAnatol Belski #define STRING_alpha0                STR_a STR_l STR_p STR_h STR_a "\0"
1207aa9433e9SAnatol Belski #define STRING_lower0                STR_l STR_o STR_w STR_e STR_r "\0"
1208aa9433e9SAnatol Belski #define STRING_upper0                STR_u STR_p STR_p STR_e STR_r "\0"
1209aa9433e9SAnatol Belski #define STRING_alnum0                STR_a STR_l STR_n STR_u STR_m "\0"
1210aa9433e9SAnatol Belski #define STRING_ascii0                STR_a STR_s STR_c STR_i STR_i "\0"
1211aa9433e9SAnatol Belski #define STRING_blank0                STR_b STR_l STR_a STR_n STR_k "\0"
1212aa9433e9SAnatol Belski #define STRING_cntrl0                STR_c STR_n STR_t STR_r STR_l "\0"
1213aa9433e9SAnatol Belski #define STRING_digit0                STR_d STR_i STR_g STR_i STR_t "\0"
1214aa9433e9SAnatol Belski #define STRING_graph0                STR_g STR_r STR_a STR_p STR_h "\0"
1215aa9433e9SAnatol Belski #define STRING_print0                STR_p STR_r STR_i STR_n STR_t "\0"
1216aa9433e9SAnatol Belski #define STRING_punct0                STR_p STR_u STR_n STR_c STR_t "\0"
1217aa9433e9SAnatol Belski #define STRING_space0                STR_s STR_p STR_a STR_c STR_e "\0"
1218aa9433e9SAnatol Belski #define STRING_word0                 STR_w STR_o STR_r STR_d       "\0"
1219aa9433e9SAnatol Belski #define STRING_xdigit                STR_x STR_d STR_i STR_g STR_i STR_t
1220aa9433e9SAnatol Belski 
1221aa9433e9SAnatol Belski #define STRING_DEFINE                STR_D STR_E STR_F STR_I STR_N STR_E
1222aa9433e9SAnatol Belski #define STRING_VERSION               STR_V STR_E STR_R STR_S STR_I STR_O STR_N
1223aa9433e9SAnatol Belski #define STRING_WEIRD_STARTWORD       STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
1224aa9433e9SAnatol Belski #define STRING_WEIRD_ENDWORD         STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
122523cb7bd5SAnatol Belski 
1226a5bc5aedSAnatol Belski #define STRING_CR_RIGHTPAR                STR_C STR_R STR_RIGHT_PARENTHESIS
1227a5bc5aedSAnatol Belski #define STRING_LF_RIGHTPAR                STR_L STR_F STR_RIGHT_PARENTHESIS
1228a5bc5aedSAnatol Belski #define STRING_CRLF_RIGHTPAR              STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1229a5bc5aedSAnatol Belski #define STRING_ANY_RIGHTPAR               STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
1230a5bc5aedSAnatol Belski #define STRING_ANYCRLF_RIGHTPAR           STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1231a5bc5aedSAnatol Belski #define STRING_NUL_RIGHTPAR               STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
1232a5bc5aedSAnatol Belski #define STRING_BSR_ANYCRLF_RIGHTPAR       STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1233a5bc5aedSAnatol Belski #define STRING_BSR_UNICODE_RIGHTPAR       STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
1234a5bc5aedSAnatol Belski #define STRING_UTF8_RIGHTPAR              STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
1235a5bc5aedSAnatol Belski #define STRING_UTF16_RIGHTPAR             STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
1236a5bc5aedSAnatol Belski #define STRING_UTF32_RIGHTPAR             STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
1237a5bc5aedSAnatol Belski #define STRING_UTF_RIGHTPAR               STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
1238a5bc5aedSAnatol Belski #define STRING_UCP_RIGHTPAR               STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
1239a5bc5aedSAnatol Belski #define STRING_NO_AUTO_POSSESS_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
1240a5bc5aedSAnatol Belski #define STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_D STR_O STR_T STR_S STR_T STR_A STR_R STR_UNDERSCORE STR_A STR_N STR_C STR_H STR_O STR_R STR_RIGHT_PARENTHESIS
1241a5bc5aedSAnatol Belski #define STRING_NO_JIT_RIGHTPAR            STR_N STR_O STR_UNDERSCORE STR_J STR_I STR_T STR_RIGHT_PARENTHESIS
1242a5bc5aedSAnatol Belski #define STRING_NO_START_OPT_RIGHTPAR      STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
1243a5bc5aedSAnatol Belski #define STRING_NOTEMPTY_RIGHTPAR          STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
1244a5bc5aedSAnatol Belski #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
1245a5bc5aedSAnatol Belski #define STRING_LIMIT_HEAP_EQ              STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
1246a5bc5aedSAnatol Belski #define STRING_LIMIT_MATCH_EQ             STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
1247a5bc5aedSAnatol Belski #define STRING_LIMIT_DEPTH_EQ             STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
1248a5bc5aedSAnatol Belski #define STRING_LIMIT_RECURSION_EQ         STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
1249a5bc5aedSAnatol Belski #define STRING_MARK                       STR_M STR_A STR_R STR_K
1250a5bc5aedSAnatol Belski 
1251a5bc5aedSAnatol Belski #endif  /* SUPPORT_UNICODE */
1252a5bc5aedSAnatol Belski 
1253a5bc5aedSAnatol Belski /* -------------------- End of character and string names -------------------*/
1254a5bc5aedSAnatol Belski 
1255a5bc5aedSAnatol Belski /* -------------------- Definitions for compiled patterns -------------------*/
1256e623e535SAndrei Zmievski 
125789a40541SAndrei Zmievski /* Codes for different types of Unicode property */
125889a40541SAndrei Zmievski 
125989a40541SAndrei Zmievski #define PT_ANY        0    /* Any property - matches all chars */
126089a40541SAndrei Zmievski #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
1261c5602787SIlia Alshanetsky #define PT_GC         2    /* Specified general characteristic (e.g. L) */
1262c5602787SIlia Alshanetsky #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
126389a40541SAndrei Zmievski #define PT_SC         4    /* Script (e.g. Han) */
1264c5602787SIlia Alshanetsky #define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
1265c5602787SIlia Alshanetsky #define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
1266c5602787SIlia Alshanetsky #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
1267c5602787SIlia Alshanetsky #define PT_WORD       8    /* Word - L plus N plus underscore */
1268357ab3cbSAnatol Belski #define PT_CLIST      9    /* Pseudo-property: match character list */
126923cb7bd5SAnatol Belski #define PT_UCNC      10    /* Universal Character nameable character */
127023cb7bd5SAnatol Belski #define PT_TABSIZE   11    /* Size of square table for autopossessify tests */
127123cb7bd5SAnatol Belski 
127223cb7bd5SAnatol Belski /* The following special properties are used only in XCLASS items, when POSIX
1273a5bc5aedSAnatol Belski classes are specified and PCRE2_UCP is set - in other words, for Unicode
127423cb7bd5SAnatol Belski handling of these classes. They are not available via the \p or \P escapes like
127523cb7bd5SAnatol Belski those in the above list, and so they do not take part in the autopossessifying
127623cb7bd5SAnatol Belski table. */
127723cb7bd5SAnatol Belski 
127823cb7bd5SAnatol Belski #define PT_PXGRAPH   11    /* [:graph:] - characters that mark the paper */
127923cb7bd5SAnatol Belski #define PT_PXPRINT   12    /* [:print:] - [:graph:] plus non-control spaces */
128023cb7bd5SAnatol Belski #define PT_PXPUNCT   13    /* [:punct:] - punctuation characters */
128189a40541SAndrei Zmievski 
128289a40541SAndrei Zmievski /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
1283276c5de0SAnatoliy Belsky contain characters with values greater than 255. */
128489a40541SAndrei Zmievski 
1285dd0e96ccSDmitry Stogov #define XCL_NOT       0x01    /* Flag: this is a negative class */
1286dd0e96ccSDmitry Stogov #define XCL_MAP       0x02    /* Flag: a 32-byte map is present */
1287dd0e96ccSDmitry Stogov #define XCL_HASPROP   0x04    /* Flag: property checks are present. */
128889a40541SAndrei Zmievski 
128989a40541SAndrei Zmievski #define XCL_END       0    /* Marks end of individual items */
129089a40541SAndrei Zmievski #define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
129189a40541SAndrei Zmievski #define XCL_RANGE     2    /* A range (two multibyte chars) follows */
129289a40541SAndrei Zmievski #define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
129389a40541SAndrei Zmievski #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
129489a40541SAndrei Zmievski 
1295e623e535SAndrei Zmievski /* These are escaped items that aren't just an encoding of a particular data
129623cb7bd5SAnatol Belski value such as \n. They must have non-zero values, as check_escape() returns 0
1297a5bc5aedSAnatol Belski for a data character. In the escapes[] table in pcre2_compile.c their values
1298a5bc5aedSAnatol Belski are negated in order to distinguish them from data values.
1299c5602787SIlia Alshanetsky 
1300a5bc5aedSAnatol Belski They must appear here in the same order as in the opcode definitions below, up
1301a5bc5aedSAnatol Belski to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL
1302