xref: /PHP-5.3/ext/mbstring/oniguruma/regenc.h (revision 7aab46a2)
1 #ifndef REGENC_H
2 #define REGENC_H
3 /**********************************************************************
4   regenc.h -  Oniguruma (regular expression library)
5 **********************************************************************/
6 /*-
7  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #ifndef RUBY_PLATFORM
33 #include "config.h"
34 #endif
35 #include "oniguruma.h"
36 
37 #ifndef NULL
38 #define NULL   ((void* )0)
39 #endif
40 
41 #ifndef TRUE
42 #define TRUE    1
43 #endif
44 
45 #ifndef FALSE
46 #define FALSE   0
47 #endif
48 
49 /* error codes */
50 #define ONIGENCERR_MEMORY                                         -5
51 #define ONIGENCERR_TYPE_BUG                                       -6
52 #define ONIGENCERR_INVALID_WIDE_CHAR_VALUE                      -400
53 #define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE                      -401
54 
55 #define ONIG_IS_NULL(p)                    (((void*)(p)) == (void*)0)
56 #define ONIG_IS_NOT_NULL(p)                (((void*)(p)) != (void*)0)
57 #define ONIG_CHECK_NULL_RETURN(p)          if (ONIG_IS_NULL(p)) return NULL
58 #define ONIG_CHECK_NULL_RETURN_VAL(p,val)  if (ONIG_IS_NULL(p)) return (val)
59 
60 
61 #ifdef ONIG_RUBY_M17N
62 
63 #define ONIG_ENCODING_INIT_DEFAULT            ONIG_ENCODING_UNDEF
64 
65 #else  /* ONIG_RUBY_M17N */
66 
67 #define USE_UNICODE_FULL_RANGE_CTYPE
68 /* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
69 /* #define USE_UNICODE_ALL_LINE_TERMINATORS */  /* see Unicode.org UTF#18 */
70 
71 #define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII
72 
73 /* for encoding system implementation (internal) */
74 ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
75 ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
76 ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
77 ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
78 ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
79 ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
80 
81 /* methods for single byte encoding */
82 ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
83 ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end));
84 ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
85 ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
86 ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
87 ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
88 ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
89 ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
90 ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
91 ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
92 
93 /* methods for multi byte encoding */
94 ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
95 ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
96 ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end));
97 ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
98 ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
99 ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
100 ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
101 ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
102 ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
103 ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
104 ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
105 
106 ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
107 
108 /* in enc/unicode.c */
109 ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
110 ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
111 
112 
113 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
114   OnigEncISO_8859_1_ToLowerCaseTable[c]
115 #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
116   OnigEncISO_8859_1_ToUpperCaseTable[c]
117 #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
118   ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
119 
120 ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
121 ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
122 ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
123 ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
124 
125 #endif /* is not ONIG_RUBY_M17N */
126 
127 ONIG_EXTERN int
128 onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
129 ONIG_EXTERN UChar*
130 onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
131 
132 /* defined in regexec.c, but used in enc/xxx.c */
133 extern int  onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
134 
135 ONIG_EXTERN OnigEncoding  OnigEncDefaultCharEncoding;
136 ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
137 ONIG_EXTERN const UChar  OnigEncAsciiToUpperCaseTable[];
138 ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
139 
140 #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
141 #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
142 #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
143   ((OnigEncAsciiCtypeTable[code] & ctype) != 0)
144 #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
145     ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
146 
147 #endif /* REGENC_H */
148